diff --git a/crypto/aes/asm/aes-ia64.S b/crypto/aes/asm/aes-ia64.S index 337783800..c96668287 100644 --- a/crypto/aes/asm/aes-ia64.S +++ b/crypto/aes/asm/aes-ia64.S @@ -24,7 +24,9 @@ rk0=r8; rk1=r9; -prsave=r10; +pfssave=r2; +lcsave=r10; +prsave=r3; maskff=r11; twenty4=r14; sixteen=r15; @@ -67,6 +69,9 @@ te0=r40; te1=r41; te2=r42; te3=r43; // Clobber: r16-r31,rk0-rk1,r32-r43 .align 32 _ia64_AES_encrypt: + .prologue + .altrp b6 + .body { .mmi; alloc r16=ar.pfs,12,0,0,8 LDKEY t0=[rk0],2*KSZ mov pr.rot=1<<16 } @@ -179,20 +184,21 @@ _ia64_AES_encrypt: .skip 16 AES_encrypt: .prologue - .save ar.pfs,r2 -{ .mmi; alloc r2=ar.pfs,3,0,12,0 - addl out8=@ltoff(AES_Te#),gp - .save ar.lc,r3 - mov r3=ar.lc } -{ .mmi; and out0=3,in0 - ADDP in0=0,in0 - ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds + .save ar.pfs,pfssave +{ .mmi; alloc pfssave=ar.pfs,3,0,12,0 + and out0=3,in0 + mov r3=ip } +{ .mmi; ADDP in0=0,in0 + ADDP out11=KSZ*60,in2 // &AES_KEY->rounds + .save ar.lc,lcsave + mov lcsave=ar.lc };; - .body -{ .mmi; ld8 out8=[out8] // Te0 - ld4 out11=[out11] // AES_KEY->rounds +{ .mmi; ld4 out11=[out11] // AES_KEY->rounds + add out8=(AES_Te#-AES_encrypt#),r3 // Te0 + .save pr,prsave mov prsave=pr } + .body #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... { .mib; cmp.ne p6,p0=out0,r0 add out0=4,in0 @@ -215,8 +221,8 @@ AES_encrypt: ADDP in1=0,in1 (p6) br.spnt .Le_o_unaligned };; -{ .mii; mov ar.pfs=r2 - mov ar.lc=r3 } +{ .mii; mov ar.pfs=psfsave + mov ar.lc=lcsave } { .mmi; st4 [in1]=r16,8 // s0 st4 [in0]=r20,8 // s1 mov pr=prsave,0x1ffff };; @@ -299,10 +305,10 @@ AES_encrypt: mov pr=prsave,0x1ffff }//;; { .mmi; st1 [out1]=r26,4 st1 [out0]=r27,4 - mov ar.pfs=r2 };; + mov ar.pfs=pfssave };; { .mmi; st1 [out3]=r28 st1 [out2]=r29 - mov ar.lc=r3 }//;; + mov ar.lc=lcsave }//;; { .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };; @@ -359,6 +365,9 @@ while(<>) { // Clobber: r16-r31,rk0-rk1,r32-r43 .align 32 _ia64_AES_decrypt: + .prologue + .altrp b6 + .body { .mmi; alloc r16=ar.pfs,12,0,0,8 LDKEY t0=[rk0],2*KSZ mov pr.rot=1<<16 } @@ -471,20 +480,21 @@ _ia64_AES_decrypt: .skip 16 AES_decrypt: .prologue - .save ar.pfs,r2 -{ .mmi; alloc r2=ar.pfs,3,0,12,0 - addl out8=@ltoff(AES_Td#),gp - .save ar.lc,r3 - mov r3=ar.lc } -{ .mmi; and out0=3,in0 - ADDP in0=0,in0 - ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds + .save ar.pfs,pfssave +{ .mmi; alloc pfssave=ar.pfs,3,0,12,0 + and out0=3,in0 + mov r3=ip } +{ .mmi; ADDP in0=0,in0 + ADDP out11=KSZ*60,in2 // &AES_KEY->rounds + .save ar.lc,lcsave + mov lcsave=ar.lc };; - .body -{ .mmi; ld8 out8=[out8] // Te0 - ld4 out11=[out11] // AES_KEY->rounds +{ .mmi; ld4 out11=[out11] // AES_KEY->rounds + add out8=(AES_Td#-AES_decrypt#),r3 // Td0 + .save pr,prsave mov prsave=pr } + .body #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... { .mib; cmp.ne p6,p0=out0,r0 add out0=4,in0 @@ -507,8 +517,8 @@ AES_decrypt: ADDP in1=0,in1 (p6) br.spnt .Ld_o_unaligned };; -{ .mii; mov ar.pfs=r2 - mov ar.lc=r3 } +{ .mii; mov ar.pfs=pfssave + mov ar.lc=lcsave } { .mmi; st4 [in1]=r16,8 // s0 st4 [in0]=r20,8 // s1 mov pr=prsave,0x1ffff };; @@ -591,10 +601,10 @@ AES_decrypt: mov pr=prsave,0x1ffff }//;; { .mmi; st1 [out1]=r26,4 st1 [out0]=r27,4 - mov ar.pfs=r2 };; + mov ar.pfs=pfssave };; { .mmi; st1 [out3]=r28 st1 [out2]=r29 - mov ar.lc=r3 }//;; + mov ar.lc=lcsave }//;; { .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };; diff --git a/crypto/sha/asm/sha512-ia64.pl b/crypto/sha/asm/sha512-ia64.pl index 9de917499..628e33e42 100755 --- a/crypto/sha/asm/sha512-ia64.pl +++ b/crypto/sha/asm/sha512-ia64.pl @@ -110,6 +110,8 @@ $code=<<___; .explicit .text +pfssave=r2; +lcsave=r3; prsave=r14; K=r15; A=r16; B=r17; C=r18; D=r19; @@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51; // small constants .align 32 $func: .prologue - .save ar.pfs,r2 -{ .mmi; alloc r2=ar.pfs,3,17,0,16 + .save ar.pfs,pfssave +{ .mmi; alloc pfssave=ar.pfs,3,17,0,16 $ADDP ctx=0,r32 // 1st arg - .save ar.lc,r3 - mov r3=ar.lc } + .save ar.lc,lcsave + mov lcsave=ar.lc } { .mmi; $ADDP input=0,r33 // 2nd arg - addl Ktbl=\@ltoff($TABLE#),gp + mov num=r34 // 3rd arg .save pr,prsave mov prsave=pr };; .body -{ .mii; ld8 Ktbl=[Ktbl] - mov num=r34 };; // 3rd arg - { .mib; add r8=0*$SZ,ctx add r9=1*$SZ,ctx brp.loop.imp .L_first16,.L_first16_ctop @@ -151,20 +150,23 @@ $func: brp.loop.imp .L_rest,.L_rest_ctop };; // load A-H +.Lpic_point: { .mmi; $LDW A=[r8],4*$SZ $LDW B=[r9],4*$SZ - mov sgm0=$sigma0[2] } + mov Ktbl=ip } { .mmi; $LDW C=[r10],4*$SZ $LDW D=[r11],4*$SZ - mov sgm1=$sigma1[2] };; + mov sgm0=$sigma0[2] };; { .mmi; $LDW E=[r8] - $LDW F=[r9] } + $LDW F=[r9] + add Ktbl=($TABLE#-.Lpic_point),Ktbl } { .mmi; $LDW G=[r10] $LDW H=[r11] cmp.ne p15,p14=0,r35 };; // used in sha256_block .L_outer: -{ .mii; mov ar.lc=15 +{ .mii; mov sgm1=$sigma1[2] + mov ar.lc=15 mov ar.ec=1 };; .align 32 .L_first16: @@ -329,7 +331,7 @@ $code.=<<___; (p6) add Ktbl=-$SZ*$rounds,Ktbl } { .mmi; $LDW r38=[r10],-4*$SZ $LDW r39=[r11],-4*$SZ -(p7) mov ar.lc=r3 };; +(p7) mov ar.lc=lcsave };; { .mmi; add A=A,r32 add B=B,r33 add C=C,r34 }