diff --git a/Configure b/Configure index f8f2e9f0f..aa818ac05 100755 --- a/Configure +++ b/Configure @@ -202,7 +202,7 @@ my %table=( "solaris-sparcv8-gcc","gcc:-mv8 -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # -m32 should be safe to add as long as driver recognizes -mcpu=ultrasparc "solaris-sparcv9-gcc","gcc:-m32 -mcpu=ultrasparc -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"solaris64-sparcv9-gcc","gcc:-m64 -mcpu=ultrasparc -O3 -Wall -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-fPIC:-m64 -shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"solaris64-sparcv9-gcc","gcc:-m64 -mcpu=ultrasparc -O3 -Wall -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-fPIC:-m64 -shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### "debug-solaris-sparcv8-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -O -g -mv8 -Wall -DB_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-solaris-sparcv9-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -DPEDANTIC -O -g -mcpu=ultrasparc -pedantic -ansi -Wall -Wshadow -Wno-long-long -D__EXTENSIONS__ -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -214,7 +214,7 @@ my %table=( "solaris-sparcv7-cc","cc:-xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris-sparcv8-cc","cc:-xarch=v8 -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris-sparcv9-cc","cc:-xtarget=ultra -xarch=v8plusa -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"solaris64-sparcv9-cc","cc:-xtarget=ultra -xarch=v9a -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-KPIC:-xarch=v9 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/ccs/bin/ar rs", +"solaris64-sparcv9-cc","cc:-xtarget=ultra -xarch=v9a -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-KPIC:-xarch=v9 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/ccs/bin/ar rs", #### "debug-solaris-sparcv8-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -xarch=v8 -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-solaris-sparcv9-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -xtarget=ultra -xarch=v8plus -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o::::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -335,7 +335,7 @@ my %table=( # -Wa,-Av8plus should do the trick no matter what. "linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plusa -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # GCC 3.1 is a requirement -"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### Alpha Linux with GNU C and Compaq C setups # Special notes: # - linux-alpha+bwx-gcc is ment to be used from ./config only. If you @@ -365,7 +365,7 @@ my %table=( # -DMD32_REG_T=int doesn't actually belong in sparc64 target, it # simply *happens* to work around a compiler bug in gcc 3.3.3, # triggered by RIPEMD160 code. -"BSD-sparc64", "gcc:-DB_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"BSD-sparc64", "gcc:-DB_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-ia64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-x86_64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", diff --git a/TABLE b/TABLE index 1c59c9ce5..f2acd1472 100644 --- a/TABLE +++ b/TABLE @@ -142,7 +142,7 @@ $unistd = $thread_cflag = -pthread -D_THREAD_SAFE -D_REENTRANT $sys_id = $lflags = -$bn_ops = SIXTY_FOUR_BIT_LONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR +$bn_ops = BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR $cpuid_obj = $bn_obj = bn_asm.o sparcv9a-mont.o $des_obj = des_enc-sparc.o fcrypt_b.o @@ -2923,7 +2923,7 @@ $unistd = $thread_cflag = -D_REENTRANT $sys_id = ULTRASPARC $lflags = -ldl -$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR +$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $cpuid_obj = $bn_obj = bn_asm.o sparcv9a-mont.o $des_obj = des_enc-sparc.o fcrypt_b.o @@ -3625,7 +3625,7 @@ $unistd = $thread_cflag = -D_REENTRANT $sys_id = ULTRASPARC $lflags = -lsocket -lnsl -ldl -$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR +$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR $cpuid_obj = $bn_obj = bn_asm.o sparcv9a-mont.o $des_obj = des_enc-sparc.o fcrypt_b.o @@ -3652,7 +3652,7 @@ $unistd = $thread_cflag = -D_REENTRANT $sys_id = ULTRASPARC $lflags = -lsocket -lnsl -ldl -$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR +$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR $cpuid_obj = $bn_obj = bn_asm.o sparcv9a-mont.o $des_obj = des_enc-sparc.o fcrypt_b.o diff --git a/crypto/bn/asm/sparcv9a-mont.pl b/crypto/bn/asm/sparcv9a-mont.pl index 87f6ac1e9..81d7ef608 100755 --- a/crypto/bn/asm/sparcv9a-mont.pl +++ b/crypto/bn/asm/sparcv9a-mont.pl @@ -138,11 +138,7 @@ $fname: save %sp,-$frame-$locals,%sp sethi %hi(0xffff),$mask or $mask,%lo(0xffff),$mask -___ -$code.=<<___ if ($bits==64); - ldx [%i4],$n0 ! $n0 reassigned, remember? -___ -$code.=<<___ if ($bits==32); + cmp $num,4 bl,a,pn %icc,.Lret clr %i0 @@ -160,8 +156,7 @@ $code.=<<___ if ($bits==32); ld [%i4+4],%o0 sllx %o0,32,%o0 or %o0,$n0,$n0 ! $n0=n0[1].n0[0] -___ -$code.=<<___; + sll $num,3,$num ! num*=8 add %sp,$bias,%o0 ! real top of stack @@ -188,48 +183,44 @@ $code.=<<___; stx %o7,[%sp+$bias+$frame+48] ! save %asi - sub %g0,$num,$i - sub %g0,$num,$j + sub %g0,$num,$i ! i=-num + sub %g0,$num,$j ! j=-num add $ap,$j,%o3 add $bp,$i,%o4 -___ -$code.=<<___ if ($bits==64); + ldx [$bp+$i],%o0 ! bp[0] ldx [$ap+$j],%o1 ! ap[0] -___ -$code.=<<___ if ($bits==32); - ldd [$bp+$i],%o0 ! bp[0] - ldd [$ap+$j],%g2 ! ap[0] - sllx %o1,32,%o1 - sllx %g3,32,%g3 - or %o0,%o1,%o0 - or %g2,%g3,%o1 -___ -$code.=<<___; + sllx %o0,32,%g1 + sllx %o1,32,%g5 + srlx %o0,32,%o0 + srlx %o1,32,%o1 + or %g1,%o0,%o0 + or %g5,%o1,%o1 + add $np,$j,%o5 mulx %o1,%o0,%o0 ! ap[0]*bp[0] mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0 stx %o0,[%sp+$bias+$frame+0] - ld [%o3+`$bits==32 ? 0 : 4`],$alo_ ! load a[j] as pair of 32-bit words + ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words fzeros $alo - ld [%o3+`$bits==32 ? 4 : 0`],$ahi_ + ld [%o3+4],$ahi_ fzeros $ahi - ld [%o5+`$bits==32 ? 0 : 4`],$nlo_ ! load n[j] as pair of 32-bit words + ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words fzeros $nlo - ld [%o5+`$bits==32 ? 4 : 0`],$nhi_ + ld [%o5+4],$nhi_ fzeros $nhi ! transfer b[i] to FPU as 4x16-bit values - ldda [%o4+`$bits==32 ? 2 : 6`]%asi,$ba + ldda [%o4+2]%asi,$ba fxtod $alo,$alo - ldda [%o4+`$bits==32 ? 0 : 4`]%asi,$bb + ldda [%o4+0]%asi,$bb fxtod $ahi,$ahi - ldda [%o4+`$bits==32 ? 6 : 2`]%asi,$bc + ldda [%o4+6]%asi,$bc fxtod $nlo,$nlo - ldda [%o4+`$bits==32 ? 4 : 0`]%asi,$bd + ldda [%o4+4]%asi,$bd fxtod $nhi,$nhi ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values @@ -256,24 +247,24 @@ $code.=<<___; fmuld $alo,$bb,$alob fmuld $nlo,$nb,$nlob fmuld $alo,$bc,$aloc - fmuld $nlo,$nc,$nloc faddd $aloa,$nloa,$nloa + fmuld $nlo,$nc,$nloc fmuld $alo,$bd,$alod - fmuld $nlo,$nd,$nlod faddd $alob,$nlob,$nlob + fmuld $nlo,$nd,$nlod fmuld $ahi,$ba,$ahia - fmuld $nhi,$na,$nhia faddd $aloc,$nloc,$nloc + fmuld $nhi,$na,$nhia fmuld $ahi,$bb,$ahib - fmuld $nhi,$nb,$nhib faddd $alod,$nlod,$nlod + fmuld $nhi,$nb,$nhib fmuld $ahi,$bc,$ahic - fmuld $nhi,$nc,$nhic faddd $ahia,$nhia,$nhia + fmuld $nhi,$nc,$nhic fmuld $ahi,$bd,$ahid + faddd $ahib,$nhib,$nhib fmuld $nhi,$nd,$nhid - faddd $ahib,$nhib,$nhib faddd $ahic,$nhic,$dota ! $nhic faddd $ahid,$nhid,$dotb ! $nhid @@ -317,13 +308,13 @@ $code.=<<___; .L1st: add $ap,$j,%o3 add $np,$j,%o4 - ld [%o3+`$bits==32 ? 0 : 4`],$alo_ ! load a[j] as pair of 32-bit words + ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words fzeros $alo - ld [%o3+`$bits==32 ? 4 : 0`],$ahi_ + ld [%o3+4],$ahi_ fzeros $ahi - ld [%o4+`$bits==32 ? 0 : 4`],$nlo_ ! load n[j] as pair of 32-bit words + ld [%o4+0],$nlo_ ! load n[j] as pair of 32-bit words fzeros $nlo - ld [%o4+`$bits==32 ? 4 : 0`],$nhi_ + ld [%o4+4],$nhi_ fzeros $nhi fxtod $alo,$alo @@ -340,23 +331,23 @@ $code.=<<___; std $nhi,[$np_h+$j] fmuld $nlo,$nb,$nlob fmuld $alo,$bc,$aloc - fmuld $nlo,$nc,$nloc faddd $aloa,$nloa,$nloa + fmuld $nlo,$nc,$nloc fmuld $alo,$bd,$alod - fmuld $nlo,$nd,$nlod faddd $alob,$nlob,$nlob + fmuld $nlo,$nd,$nlod fmuld $ahi,$ba,$ahia - fmuld $nhi,$na,$nhia faddd $aloc,$nloc,$nloc + fmuld $nhi,$na,$nhia fmuld $ahi,$bb,$ahib - fmuld $nhi,$nb,$nhib faddd $alod,$nlod,$nlod + fmuld $nhi,$nb,$nhib fmuld $ahi,$bc,$ahic - fmuld $nhi,$nc,$nhic faddd $ahia,$nhia,$nhia + fmuld $nhi,$nc,$nhic fmuld $ahi,$bd,$ahid - fmuld $nhi,$nd,$nhid faddd $ahib,$nhib,$nhib + fmuld $nhi,$nd,$nhid faddd $dota,$nloa,$nloa faddd $dotb,$nlob,$nlob @@ -429,36 +420,31 @@ $code.=<<___; add $i,8,$i .align 32 .Louter: - sub %g0,$num,$j + sub %g0,$num,$j ! j=-num add %sp,$bias+$frame+$locals,$tp add $bp,$i,%o4 -___ -$code.=<<___ if ($bits==64); + ldx [$bp+$i],%o0 ! bp[i] ldx [$ap+$j],%o1 ! ap[0] -___ -$code.=<<___ if ($bits==32); - ldd [$bp+$i],%o0 ! bp[i] - ldd [$ap+$j],%g2 ! ap[0] - sllx %o1,32,%o1 - sllx %g3,32,%g3 - or %o0,%o1,%o0 - or %g2,%g3,%o1 -___ -$code.=<<___; + sllx %o0,32,%g1 + sllx %o1,32,%g5 + srlx %o0,32,%o0 + srlx %o1,32,%o1 + or %g1,%o0,%o0 + or %g5,%o1,%o1 + ldx [$tp],%o2 ! tp[0] mulx %o1,%o0,%o0 addcc %o2,%o0,%o0 mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0 stx %o0,[%sp+$bias+$frame+0] - ! transfer b[i] to FPU as 4x16-bit values - ldda [%o4+`$bits==32 ? 2 : 6`]%asi,$ba - ldda [%o4+`$bits==32 ? 0 : 4`]%asi,$bb - ldda [%o4+`$bits==32 ? 6 : 2`]%asi,$bc - ldda [%o4+`$bits==32 ? 4 : 0`]%asi,$bd + ldda [%o4+2]%asi,$ba + ldda [%o4+0]%asi,$bb + ldda [%o4+6]%asi,$bc + ldda [%o4+4]%asi,$bd ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values ldda [%sp+$bias+$frame+6]%asi,$na @@ -483,24 +469,24 @@ $code.=<<___; fmuld $alo,$bb,$alob fmuld $nlo,$nb,$nlob fmuld $alo,$bc,$aloc - fmuld $nlo,$nc,$nloc faddd $aloa,$nloa,$nloa + fmuld $nlo,$nc,$nloc fmuld $alo,$bd,$alod - fmuld $nlo,$nd,$nlod faddd $alob,$nlob,$nlob + fmuld $nlo,$nd,$nlod fmuld $ahi,$ba,$ahia - fmuld $nhi,$na,$nhia faddd $aloc,$nloc,$nloc + fmuld $nhi,$na,$nhia fmuld $ahi,$bb,$ahib - fmuld $nhi,$nb,$nhib faddd $alod,$nlod,$nlod + fmuld $nhi,$nb,$nhib fmuld $ahi,$bc,$ahic - fmuld $nhi,$nc,$nhic faddd $ahia,$nhia,$nhia + fmuld $nhi,$nc,$nhic fmuld $ahi,$bd,$ahid + faddd $ahib,$nhib,$nhib fmuld $nhi,$nd,$nhid - faddd $ahib,$nhib,$nhib faddd $ahic,$nhic,$dota ! $nhic faddd $ahid,$nhid,$dotb ! $nhid @@ -558,24 +544,24 @@ $code.=<<___; fmuld $alo,$bb,$alob fmuld $nlo,$nb,$nlob fmuld $alo,$bc,$aloc - fmuld $nlo,$nc,$nloc faddd $aloa,$nloa,$nloa + fmuld $nlo,$nc,$nloc fmuld $alo,$bd,$alod - fmuld $nlo,$nd,$nlod faddd $alob,$nlob,$nlob + fmuld $nlo,$nd,$nlod fmuld $ahi,$ba,$ahia - fmuld $nhi,$na,$nhia faddd $aloc,$nloc,$nloc + fmuld $nhi,$na,$nhia fmuld $ahi,$bb,$ahib - fmuld $nhi,$nb,$nhib faddd $alod,$nlod,$nlod + fmuld $nhi,$nb,$nhib fmuld $ahi,$bc,$ahic - fmuld $nhi,$nc,$nhic faddd $ahia,$nhia,$nhia + fmuld $nhi,$nc,$nhic fmuld $ahi,$bd,$ahid + faddd $ahib,$nhib,$nhib fmuld $nhi,$nd,$nhid - faddd $ahib,$nhib,$nhib faddd $dota,$nloa,$nloa faddd $dotb,$nlob,$nlob faddd $ahic,$nhic,$dota ! $nhic @@ -661,7 +647,7 @@ $code.=<<___; add $tp,8,$tp ! adjust tp to point at the end ld [$tp-8],%o0 - ld [$np-`$bits==32 ? 4 : 8`],%o1 + ld [$np-4],%o1 cmp %o0,%o1 ! compare topmost words bcs,pt %icc,.Lcopy ! %icc.c is clean if not taken nop @@ -670,41 +656,26 @@ $code.=<<___; .Lsub: ldd [$tp+%o7],%o0 ldd [$np+%o7],%o2 -___ -$code.=<<___ if ($bits==64); - subccc %o1,%o3,%o3 - subccc %o0,%o2,%o2 -___ -$code.=<<___ if ($bits==32); subccc %o1,%o2,%o2 subccc %o0,%o3,%o3 -___ -$code.=<<___; std %o2,[$rp+%o7] add %o7,8,%o7 brnz,pt %o7,.Lsub nop subccc $carry,0,$carry bcc,pt %icc,.Lzap - sub %g0,$num,%o7 + sub %g0,$num,%o7 ! n=-num .align 16,0x1000000 .Lcopy: ldx [$tp+%o7],%o0 -___ -$code.=<<___ if ($bits==64); - stx %o0,[$rp+%o7] -___ -$code.=<<___ if ($bits==32); srlx %o0,32,%o1 std %o0,[$rp+%o7] -___ -$code.=<<___; add %o7,8,%o7 brnz,pt %o7,.Lcopy nop ba .Lzap - sub %g0,$num,%o7 + sub %g0,$num,%o7 ! n=-num .align 32 .Lzap: