Pedantic polish to aes-ia64 and sha512-ia64.
This commit is contained in:
parent
165a28abae
commit
0066590f98
@ -24,7 +24,9 @@
|
|||||||
|
|
||||||
rk0=r8; rk1=r9;
|
rk0=r8; rk1=r9;
|
||||||
|
|
||||||
prsave=r10;
|
pfssave=r2;
|
||||||
|
lcsave=r10;
|
||||||
|
prsave=r3;
|
||||||
maskff=r11;
|
maskff=r11;
|
||||||
twenty4=r14;
|
twenty4=r14;
|
||||||
sixteen=r15;
|
sixteen=r15;
|
||||||
@ -67,6 +69,9 @@ te0=r40; te1=r41; te2=r42; te3=r43;
|
|||||||
// Clobber: r16-r31,rk0-rk1,r32-r43
|
// Clobber: r16-r31,rk0-rk1,r32-r43
|
||||||
.align 32
|
.align 32
|
||||||
_ia64_AES_encrypt:
|
_ia64_AES_encrypt:
|
||||||
|
.prologue
|
||||||
|
.altrp b6
|
||||||
|
.body
|
||||||
{ .mmi; alloc r16=ar.pfs,12,0,0,8
|
{ .mmi; alloc r16=ar.pfs,12,0,0,8
|
||||||
LDKEY t0=[rk0],2*KSZ
|
LDKEY t0=[rk0],2*KSZ
|
||||||
mov pr.rot=1<<16 }
|
mov pr.rot=1<<16 }
|
||||||
@ -179,20 +184,21 @@ _ia64_AES_encrypt:
|
|||||||
.skip 16
|
.skip 16
|
||||||
AES_encrypt:
|
AES_encrypt:
|
||||||
.prologue
|
.prologue
|
||||||
.save ar.pfs,r2
|
.save ar.pfs,pfssave
|
||||||
{ .mmi; alloc r2=ar.pfs,3,0,12,0
|
{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
|
||||||
addl out8=@ltoff(AES_Te#),gp
|
and out0=3,in0
|
||||||
.save ar.lc,r3
|
mov r3=ip }
|
||||||
mov r3=ar.lc }
|
{ .mmi; ADDP in0=0,in0
|
||||||
{ .mmi; and out0=3,in0
|
ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
|
||||||
ADDP in0=0,in0
|
.save ar.lc,lcsave
|
||||||
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
|
mov lcsave=ar.lc };;
|
||||||
|
|
||||||
.body
|
{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
|
||||||
{ .mmi; ld8 out8=[out8] // Te0
|
add out8=(AES_Te#-AES_encrypt#),r3 // Te0
|
||||||
ld4 out11=[out11] // AES_KEY->rounds
|
.save pr,prsave
|
||||||
mov prsave=pr }
|
mov prsave=pr }
|
||||||
|
|
||||||
|
.body
|
||||||
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
|
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
|
||||||
{ .mib; cmp.ne p6,p0=out0,r0
|
{ .mib; cmp.ne p6,p0=out0,r0
|
||||||
add out0=4,in0
|
add out0=4,in0
|
||||||
@ -215,8 +221,8 @@ AES_encrypt:
|
|||||||
ADDP in1=0,in1
|
ADDP in1=0,in1
|
||||||
(p6) br.spnt .Le_o_unaligned };;
|
(p6) br.spnt .Le_o_unaligned };;
|
||||||
|
|
||||||
{ .mii; mov ar.pfs=r2
|
{ .mii; mov ar.pfs=psfsave
|
||||||
mov ar.lc=r3 }
|
mov ar.lc=lcsave }
|
||||||
{ .mmi; st4 [in1]=r16,8 // s0
|
{ .mmi; st4 [in1]=r16,8 // s0
|
||||||
st4 [in0]=r20,8 // s1
|
st4 [in0]=r20,8 // s1
|
||||||
mov pr=prsave,0x1ffff };;
|
mov pr=prsave,0x1ffff };;
|
||||||
@ -299,10 +305,10 @@ AES_encrypt:
|
|||||||
mov pr=prsave,0x1ffff }//;;
|
mov pr=prsave,0x1ffff }//;;
|
||||||
{ .mmi; st1 [out1]=r26,4
|
{ .mmi; st1 [out1]=r26,4
|
||||||
st1 [out0]=r27,4
|
st1 [out0]=r27,4
|
||||||
mov ar.pfs=r2 };;
|
mov ar.pfs=pfssave };;
|
||||||
{ .mmi; st1 [out3]=r28
|
{ .mmi; st1 [out3]=r28
|
||||||
st1 [out2]=r29
|
st1 [out2]=r29
|
||||||
mov ar.lc=r3 }//;;
|
mov ar.lc=lcsave }//;;
|
||||||
{ .mmb; st1 [out1]=r30
|
{ .mmb; st1 [out1]=r30
|
||||||
st1 [out0]=r31
|
st1 [out0]=r31
|
||||||
br.ret.sptk.many b0 };;
|
br.ret.sptk.many b0 };;
|
||||||
@ -359,6 +365,9 @@ while(<>) {
|
|||||||
// Clobber: r16-r31,rk0-rk1,r32-r43
|
// Clobber: r16-r31,rk0-rk1,r32-r43
|
||||||
.align 32
|
.align 32
|
||||||
_ia64_AES_decrypt:
|
_ia64_AES_decrypt:
|
||||||
|
.prologue
|
||||||
|
.altrp b6
|
||||||
|
.body
|
||||||
{ .mmi; alloc r16=ar.pfs,12,0,0,8
|
{ .mmi; alloc r16=ar.pfs,12,0,0,8
|
||||||
LDKEY t0=[rk0],2*KSZ
|
LDKEY t0=[rk0],2*KSZ
|
||||||
mov pr.rot=1<<16 }
|
mov pr.rot=1<<16 }
|
||||||
@ -471,20 +480,21 @@ _ia64_AES_decrypt:
|
|||||||
.skip 16
|
.skip 16
|
||||||
AES_decrypt:
|
AES_decrypt:
|
||||||
.prologue
|
.prologue
|
||||||
.save ar.pfs,r2
|
.save ar.pfs,pfssave
|
||||||
{ .mmi; alloc r2=ar.pfs,3,0,12,0
|
{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
|
||||||
addl out8=@ltoff(AES_Td#),gp
|
and out0=3,in0
|
||||||
.save ar.lc,r3
|
mov r3=ip }
|
||||||
mov r3=ar.lc }
|
{ .mmi; ADDP in0=0,in0
|
||||||
{ .mmi; and out0=3,in0
|
ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
|
||||||
ADDP in0=0,in0
|
.save ar.lc,lcsave
|
||||||
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
|
mov lcsave=ar.lc };;
|
||||||
|
|
||||||
.body
|
{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
|
||||||
{ .mmi; ld8 out8=[out8] // Te0
|
add out8=(AES_Td#-AES_decrypt#),r3 // Td0
|
||||||
ld4 out11=[out11] // AES_KEY->rounds
|
.save pr,prsave
|
||||||
mov prsave=pr }
|
mov prsave=pr }
|
||||||
|
|
||||||
|
.body
|
||||||
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
|
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
|
||||||
{ .mib; cmp.ne p6,p0=out0,r0
|
{ .mib; cmp.ne p6,p0=out0,r0
|
||||||
add out0=4,in0
|
add out0=4,in0
|
||||||
@ -507,8 +517,8 @@ AES_decrypt:
|
|||||||
ADDP in1=0,in1
|
ADDP in1=0,in1
|
||||||
(p6) br.spnt .Ld_o_unaligned };;
|
(p6) br.spnt .Ld_o_unaligned };;
|
||||||
|
|
||||||
{ .mii; mov ar.pfs=r2
|
{ .mii; mov ar.pfs=pfssave
|
||||||
mov ar.lc=r3 }
|
mov ar.lc=lcsave }
|
||||||
{ .mmi; st4 [in1]=r16,8 // s0
|
{ .mmi; st4 [in1]=r16,8 // s0
|
||||||
st4 [in0]=r20,8 // s1
|
st4 [in0]=r20,8 // s1
|
||||||
mov pr=prsave,0x1ffff };;
|
mov pr=prsave,0x1ffff };;
|
||||||
@ -591,10 +601,10 @@ AES_decrypt:
|
|||||||
mov pr=prsave,0x1ffff }//;;
|
mov pr=prsave,0x1ffff }//;;
|
||||||
{ .mmi; st1 [out1]=r26,4
|
{ .mmi; st1 [out1]=r26,4
|
||||||
st1 [out0]=r27,4
|
st1 [out0]=r27,4
|
||||||
mov ar.pfs=r2 };;
|
mov ar.pfs=pfssave };;
|
||||||
{ .mmi; st1 [out3]=r28
|
{ .mmi; st1 [out3]=r28
|
||||||
st1 [out2]=r29
|
st1 [out2]=r29
|
||||||
mov ar.lc=r3 }//;;
|
mov ar.lc=lcsave }//;;
|
||||||
{ .mmb; st1 [out1]=r30
|
{ .mmb; st1 [out1]=r30
|
||||||
st1 [out0]=r31
|
st1 [out0]=r31
|
||||||
br.ret.sptk.many b0 };;
|
br.ret.sptk.many b0 };;
|
||||||
|
@ -110,6 +110,8 @@ $code=<<___;
|
|||||||
.explicit
|
.explicit
|
||||||
.text
|
.text
|
||||||
|
|
||||||
|
pfssave=r2;
|
||||||
|
lcsave=r3;
|
||||||
prsave=r14;
|
prsave=r14;
|
||||||
K=r15;
|
K=r15;
|
||||||
A=r16; B=r17; C=r18; D=r19;
|
A=r16; B=r17; C=r18; D=r19;
|
||||||
@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51; // small constants
|
|||||||
.align 32
|
.align 32
|
||||||
$func:
|
$func:
|
||||||
.prologue
|
.prologue
|
||||||
.save ar.pfs,r2
|
.save ar.pfs,pfssave
|
||||||
{ .mmi; alloc r2=ar.pfs,3,17,0,16
|
{ .mmi; alloc pfssave=ar.pfs,3,17,0,16
|
||||||
$ADDP ctx=0,r32 // 1st arg
|
$ADDP ctx=0,r32 // 1st arg
|
||||||
.save ar.lc,r3
|
.save ar.lc,lcsave
|
||||||
mov r3=ar.lc }
|
mov lcsave=ar.lc }
|
||||||
{ .mmi; $ADDP input=0,r33 // 2nd arg
|
{ .mmi; $ADDP input=0,r33 // 2nd arg
|
||||||
addl Ktbl=\@ltoff($TABLE#),gp
|
mov num=r34 // 3rd arg
|
||||||
.save pr,prsave
|
.save pr,prsave
|
||||||
mov prsave=pr };;
|
mov prsave=pr };;
|
||||||
|
|
||||||
.body
|
.body
|
||||||
{ .mii; ld8 Ktbl=[Ktbl]
|
|
||||||
mov num=r34 };; // 3rd arg
|
|
||||||
|
|
||||||
{ .mib; add r8=0*$SZ,ctx
|
{ .mib; add r8=0*$SZ,ctx
|
||||||
add r9=1*$SZ,ctx
|
add r9=1*$SZ,ctx
|
||||||
brp.loop.imp .L_first16,.L_first16_ctop
|
brp.loop.imp .L_first16,.L_first16_ctop
|
||||||
@ -151,20 +150,23 @@ $func:
|
|||||||
brp.loop.imp .L_rest,.L_rest_ctop
|
brp.loop.imp .L_rest,.L_rest_ctop
|
||||||
};;
|
};;
|
||||||
// load A-H
|
// load A-H
|
||||||
|
.Lpic_point:
|
||||||
{ .mmi; $LDW A=[r8],4*$SZ
|
{ .mmi; $LDW A=[r8],4*$SZ
|
||||||
$LDW B=[r9],4*$SZ
|
$LDW B=[r9],4*$SZ
|
||||||
mov sgm0=$sigma0[2] }
|
mov Ktbl=ip }
|
||||||
{ .mmi; $LDW C=[r10],4*$SZ
|
{ .mmi; $LDW C=[r10],4*$SZ
|
||||||
$LDW D=[r11],4*$SZ
|
$LDW D=[r11],4*$SZ
|
||||||
mov sgm1=$sigma1[2] };;
|
mov sgm0=$sigma0[2] };;
|
||||||
{ .mmi; $LDW E=[r8]
|
{ .mmi; $LDW E=[r8]
|
||||||
$LDW F=[r9] }
|
$LDW F=[r9]
|
||||||
|
add Ktbl=($TABLE#-.Lpic_point),Ktbl }
|
||||||
{ .mmi; $LDW G=[r10]
|
{ .mmi; $LDW G=[r10]
|
||||||
$LDW H=[r11]
|
$LDW H=[r11]
|
||||||
cmp.ne p15,p14=0,r35 };; // used in sha256_block
|
cmp.ne p15,p14=0,r35 };; // used in sha256_block
|
||||||
|
|
||||||
.L_outer:
|
.L_outer:
|
||||||
{ .mii; mov ar.lc=15
|
{ .mii; mov sgm1=$sigma1[2]
|
||||||
|
mov ar.lc=15
|
||||||
mov ar.ec=1 };;
|
mov ar.ec=1 };;
|
||||||
.align 32
|
.align 32
|
||||||
.L_first16:
|
.L_first16:
|
||||||
@ -329,7 +331,7 @@ $code.=<<___;
|
|||||||
(p6) add Ktbl=-$SZ*$rounds,Ktbl }
|
(p6) add Ktbl=-$SZ*$rounds,Ktbl }
|
||||||
{ .mmi; $LDW r38=[r10],-4*$SZ
|
{ .mmi; $LDW r38=[r10],-4*$SZ
|
||||||
$LDW r39=[r11],-4*$SZ
|
$LDW r39=[r11],-4*$SZ
|
||||||
(p7) mov ar.lc=r3 };;
|
(p7) mov ar.lc=lcsave };;
|
||||||
{ .mmi; add A=A,r32
|
{ .mmi; add A=A,r32
|
||||||
add B=B,r33
|
add B=B,r33
|
||||||
add C=C,r34 }
|
add C=C,r34 }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user