Switch 64-bit sparcv9 platforms from bn(64,64) to bn(64,32). This doesn't

have impact on performance, because amount of multiplications does not
increase with this switch, not on sparcv9 that is. On the contrary, it
actually improves performance, because it spares a load of instructions
used to chase carries. Not to mention that BN assembler modules can be
shared more freely between 32- and 64-bit builts.
This commit is contained in:
Andy Polyakov 2005-12-15 22:40:58 +00:00
parent 877e8e970c
commit 6df8c74d5b
3 changed files with 72 additions and 101 deletions

View File

@ -202,7 +202,7 @@ my %table=(
"solaris-sparcv8-gcc","gcc:-mv8 -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris-sparcv8-gcc","gcc:-mv8 -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# -m32 should be safe to add as long as driver recognizes -mcpu=ultrasparc # -m32 should be safe to add as long as driver recognizes -mcpu=ultrasparc
"solaris-sparcv9-gcc","gcc:-m32 -mcpu=ultrasparc -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris-sparcv9-gcc","gcc:-m32 -mcpu=ultrasparc -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"solaris64-sparcv9-gcc","gcc:-m64 -mcpu=ultrasparc -O3 -Wall -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-fPIC:-m64 -shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris64-sparcv9-gcc","gcc:-m64 -mcpu=ultrasparc -O3 -Wall -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-fPIC:-m64 -shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
#### ####
"debug-solaris-sparcv8-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -O -g -mv8 -Wall -DB_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-solaris-sparcv8-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -O -g -mv8 -Wall -DB_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"debug-solaris-sparcv9-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -DPEDANTIC -O -g -mcpu=ultrasparc -pedantic -ansi -Wall -Wshadow -Wno-long-long -D__EXTENSIONS__ -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-solaris-sparcv9-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -DPEDANTIC -O -g -mcpu=ultrasparc -pedantic -ansi -Wall -Wshadow -Wno-long-long -D__EXTENSIONS__ -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@ -214,7 +214,7 @@ my %table=(
"solaris-sparcv7-cc","cc:-xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris-sparcv7-cc","cc:-xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"solaris-sparcv8-cc","cc:-xarch=v8 -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris-sparcv8-cc","cc:-xarch=v8 -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"solaris-sparcv9-cc","cc:-xtarget=ultra -xarch=v8plusa -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris-sparcv9-cc","cc:-xtarget=ultra -xarch=v8plusa -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"solaris64-sparcv9-cc","cc:-xtarget=ultra -xarch=v9a -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-KPIC:-xarch=v9 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/ccs/bin/ar rs", "solaris64-sparcv9-cc","cc:-xtarget=ultra -xarch=v9a -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:solaris-shared:-KPIC:-xarch=v9 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/ccs/bin/ar rs",
#### ####
"debug-solaris-sparcv8-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -xarch=v8 -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-solaris-sparcv8-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -xarch=v8 -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"debug-solaris-sparcv9-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -xtarget=ultra -xarch=v8plus -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o::::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-solaris-sparcv9-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -xtarget=ultra -xarch=v8plus -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o::::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@ -335,7 +335,7 @@ my %table=(
# -Wa,-Av8plus should do the trick no matter what. # -Wa,-Av8plus should do the trick no matter what.
"linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plusa -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plusa -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv8plus.o::::::dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# GCC 3.1 is a requirement # GCC 3.1 is a requirement
"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
#### Alpha Linux with GNU C and Compaq C setups #### Alpha Linux with GNU C and Compaq C setups
# Special notes: # Special notes:
# - linux-alpha+bwx-gcc is ment to be used from ./config only. If you # - linux-alpha+bwx-gcc is ment to be used from ./config only. If you
@ -365,7 +365,7 @@ my %table=(
# -DMD32_REG_T=int doesn't actually belong in sparc64 target, it # -DMD32_REG_T=int doesn't actually belong in sparc64 target, it
# simply *happens* to work around a compiler bug in gcc 3.3.3, # simply *happens* to work around a compiler bug in gcc 3.3.3,
# triggered by RIPEMD160 code. # triggered by RIPEMD160 code.
"BSD-sparc64", "gcc:-DB_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-sparc64", "gcc:-DB_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR::bn_asm.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"BSD-ia64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-ia64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"BSD-x86_64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-x86_64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",

8
TABLE
View File

@ -142,7 +142,7 @@ $unistd =
$thread_cflag = -pthread -D_THREAD_SAFE -D_REENTRANT $thread_cflag = -pthread -D_THREAD_SAFE -D_REENTRANT
$sys_id = $sys_id =
$lflags = $lflags =
$bn_ops = SIXTY_FOUR_BIT_LONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR $bn_ops = BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR
$cpuid_obj = $cpuid_obj =
$bn_obj = bn_asm.o sparcv9a-mont.o $bn_obj = bn_asm.o sparcv9a-mont.o
$des_obj = des_enc-sparc.o fcrypt_b.o $des_obj = des_enc-sparc.o fcrypt_b.o
@ -2923,7 +2923,7 @@ $unistd =
$thread_cflag = -D_REENTRANT $thread_cflag = -D_REENTRANT
$sys_id = ULTRASPARC $sys_id = ULTRASPARC
$lflags = -ldl $lflags = -ldl
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = $cpuid_obj =
$bn_obj = bn_asm.o sparcv9a-mont.o $bn_obj = bn_asm.o sparcv9a-mont.o
$des_obj = des_enc-sparc.o fcrypt_b.o $des_obj = des_enc-sparc.o fcrypt_b.o
@ -3625,7 +3625,7 @@ $unistd =
$thread_cflag = -D_REENTRANT $thread_cflag = -D_REENTRANT
$sys_id = ULTRASPARC $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl $lflags = -lsocket -lnsl -ldl
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = $cpuid_obj =
$bn_obj = bn_asm.o sparcv9a-mont.o $bn_obj = bn_asm.o sparcv9a-mont.o
$des_obj = des_enc-sparc.o fcrypt_b.o $des_obj = des_enc-sparc.o fcrypt_b.o
@ -3652,7 +3652,7 @@ $unistd =
$thread_cflag = -D_REENTRANT $thread_cflag = -D_REENTRANT
$sys_id = ULTRASPARC $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl $lflags = -lsocket -lnsl -ldl
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = $cpuid_obj =
$bn_obj = bn_asm.o sparcv9a-mont.o $bn_obj = bn_asm.o sparcv9a-mont.o
$des_obj = des_enc-sparc.o fcrypt_b.o $des_obj = des_enc-sparc.o fcrypt_b.o

View File

@ -138,11 +138,7 @@ $fname:
save %sp,-$frame-$locals,%sp save %sp,-$frame-$locals,%sp
sethi %hi(0xffff),$mask sethi %hi(0xffff),$mask
or $mask,%lo(0xffff),$mask or $mask,%lo(0xffff),$mask
___
$code.=<<___ if ($bits==64);
ldx [%i4],$n0 ! $n0 reassigned, remember?
___
$code.=<<___ if ($bits==32);
cmp $num,4 cmp $num,4
bl,a,pn %icc,.Lret bl,a,pn %icc,.Lret
clr %i0 clr %i0
@ -160,8 +156,7 @@ $code.=<<___ if ($bits==32);
ld [%i4+4],%o0 ld [%i4+4],%o0
sllx %o0,32,%o0 sllx %o0,32,%o0
or %o0,$n0,$n0 ! $n0=n0[1].n0[0] or %o0,$n0,$n0 ! $n0=n0[1].n0[0]
___
$code.=<<___;
sll $num,3,$num ! num*=8 sll $num,3,$num ! num*=8
add %sp,$bias,%o0 ! real top of stack add %sp,$bias,%o0 ! real top of stack
@ -188,48 +183,44 @@ $code.=<<___;
stx %o7,[%sp+$bias+$frame+48] ! save %asi stx %o7,[%sp+$bias+$frame+48] ! save %asi
sub %g0,$num,$i sub %g0,$num,$i ! i=-num
sub %g0,$num,$j sub %g0,$num,$j ! j=-num
add $ap,$j,%o3 add $ap,$j,%o3
add $bp,$i,%o4 add $bp,$i,%o4
___
$code.=<<___ if ($bits==64);
ldx [$bp+$i],%o0 ! bp[0] ldx [$bp+$i],%o0 ! bp[0]
ldx [$ap+$j],%o1 ! ap[0] ldx [$ap+$j],%o1 ! ap[0]
___ sllx %o0,32,%g1
$code.=<<___ if ($bits==32); sllx %o1,32,%g5
ldd [$bp+$i],%o0 ! bp[0] srlx %o0,32,%o0
ldd [$ap+$j],%g2 ! ap[0] srlx %o1,32,%o1
sllx %o1,32,%o1 or %g1,%o0,%o0
sllx %g3,32,%g3 or %g5,%o1,%o1
or %o0,%o1,%o0
or %g2,%g3,%o1
___
$code.=<<___;
add $np,$j,%o5 add $np,$j,%o5
mulx %o1,%o0,%o0 ! ap[0]*bp[0] mulx %o1,%o0,%o0 ! ap[0]*bp[0]
mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0 mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0
stx %o0,[%sp+$bias+$frame+0] stx %o0,[%sp+$bias+$frame+0]
ld [%o3+`$bits==32 ? 0 : 4`],$alo_ ! load a[j] as pair of 32-bit words ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words
fzeros $alo fzeros $alo
ld [%o3+`$bits==32 ? 4 : 0`],$ahi_ ld [%o3+4],$ahi_
fzeros $ahi fzeros $ahi
ld [%o5+`$bits==32 ? 0 : 4`],$nlo_ ! load n[j] as pair of 32-bit words ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
fzeros $nlo fzeros $nlo
ld [%o5+`$bits==32 ? 4 : 0`],$nhi_ ld [%o5+4],$nhi_
fzeros $nhi fzeros $nhi
! transfer b[i] to FPU as 4x16-bit values ! transfer b[i] to FPU as 4x16-bit values
ldda [%o4+`$bits==32 ? 2 : 6`]%asi,$ba ldda [%o4+2]%asi,$ba
fxtod $alo,$alo fxtod $alo,$alo
ldda [%o4+`$bits==32 ? 0 : 4`]%asi,$bb ldda [%o4+0]%asi,$bb
fxtod $ahi,$ahi fxtod $ahi,$ahi
ldda [%o4+`$bits==32 ? 6 : 2`]%asi,$bc ldda [%o4+6]%asi,$bc
fxtod $nlo,$nlo fxtod $nlo,$nlo
ldda [%o4+`$bits==32 ? 4 : 0`]%asi,$bd ldda [%o4+4]%asi,$bd
fxtod $nhi,$nhi fxtod $nhi,$nhi
! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
@ -256,24 +247,24 @@ $code.=<<___;
fmuld $alo,$bb,$alob fmuld $alo,$bb,$alob
fmuld $nlo,$nb,$nlob fmuld $nlo,$nb,$nlob
fmuld $alo,$bc,$aloc fmuld $alo,$bc,$aloc
fmuld $nlo,$nc,$nloc
faddd $aloa,$nloa,$nloa faddd $aloa,$nloa,$nloa
fmuld $nlo,$nc,$nloc
fmuld $alo,$bd,$alod fmuld $alo,$bd,$alod
fmuld $nlo,$nd,$nlod
faddd $alob,$nlob,$nlob faddd $alob,$nlob,$nlob
fmuld $nlo,$nd,$nlod
fmuld $ahi,$ba,$ahia fmuld $ahi,$ba,$ahia
fmuld $nhi,$na,$nhia
faddd $aloc,$nloc,$nloc faddd $aloc,$nloc,$nloc
fmuld $nhi,$na,$nhia
fmuld $ahi,$bb,$ahib fmuld $ahi,$bb,$ahib
fmuld $nhi,$nb,$nhib
faddd $alod,$nlod,$nlod faddd $alod,$nlod,$nlod
fmuld $nhi,$nb,$nhib
fmuld $ahi,$bc,$ahic fmuld $ahi,$bc,$ahic
fmuld $nhi,$nc,$nhic
faddd $ahia,$nhia,$nhia faddd $ahia,$nhia,$nhia
fmuld $nhi,$nc,$nhic
fmuld $ahi,$bd,$ahid fmuld $ahi,$bd,$ahid
faddd $ahib,$nhib,$nhib
fmuld $nhi,$nd,$nhid fmuld $nhi,$nd,$nhid
faddd $ahib,$nhib,$nhib
faddd $ahic,$nhic,$dota ! $nhic faddd $ahic,$nhic,$dota ! $nhic
faddd $ahid,$nhid,$dotb ! $nhid faddd $ahid,$nhid,$dotb ! $nhid
@ -317,13 +308,13 @@ $code.=<<___;
.L1st: .L1st:
add $ap,$j,%o3 add $ap,$j,%o3
add $np,$j,%o4 add $np,$j,%o4
ld [%o3+`$bits==32 ? 0 : 4`],$alo_ ! load a[j] as pair of 32-bit words ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words
fzeros $alo fzeros $alo
ld [%o3+`$bits==32 ? 4 : 0`],$ahi_ ld [%o3+4],$ahi_
fzeros $ahi fzeros $ahi
ld [%o4+`$bits==32 ? 0 : 4`],$nlo_ ! load n[j] as pair of 32-bit words ld [%o4+0],$nlo_ ! load n[j] as pair of 32-bit words
fzeros $nlo fzeros $nlo
ld [%o4+`$bits==32 ? 4 : 0`],$nhi_ ld [%o4+4],$nhi_
fzeros $nhi fzeros $nhi
fxtod $alo,$alo fxtod $alo,$alo
@ -340,23 +331,23 @@ $code.=<<___;
std $nhi,[$np_h+$j] std $nhi,[$np_h+$j]
fmuld $nlo,$nb,$nlob fmuld $nlo,$nb,$nlob
fmuld $alo,$bc,$aloc fmuld $alo,$bc,$aloc
fmuld $nlo,$nc,$nloc
faddd $aloa,$nloa,$nloa faddd $aloa,$nloa,$nloa
fmuld $nlo,$nc,$nloc
fmuld $alo,$bd,$alod fmuld $alo,$bd,$alod
fmuld $nlo,$nd,$nlod
faddd $alob,$nlob,$nlob faddd $alob,$nlob,$nlob
fmuld $nlo,$nd,$nlod
fmuld $ahi,$ba,$ahia fmuld $ahi,$ba,$ahia
fmuld $nhi,$na,$nhia
faddd $aloc,$nloc,$nloc faddd $aloc,$nloc,$nloc
fmuld $nhi,$na,$nhia
fmuld $ahi,$bb,$ahib fmuld $ahi,$bb,$ahib
fmuld $nhi,$nb,$nhib
faddd $alod,$nlod,$nlod faddd $alod,$nlod,$nlod
fmuld $nhi,$nb,$nhib
fmuld $ahi,$bc,$ahic fmuld $ahi,$bc,$ahic
fmuld $nhi,$nc,$nhic
faddd $ahia,$nhia,$nhia faddd $ahia,$nhia,$nhia
fmuld $nhi,$nc,$nhic
fmuld $ahi,$bd,$ahid fmuld $ahi,$bd,$ahid
fmuld $nhi,$nd,$nhid
faddd $ahib,$nhib,$nhib faddd $ahib,$nhib,$nhib
fmuld $nhi,$nd,$nhid
faddd $dota,$nloa,$nloa faddd $dota,$nloa,$nloa
faddd $dotb,$nlob,$nlob faddd $dotb,$nlob,$nlob
@ -429,36 +420,31 @@ $code.=<<___;
add $i,8,$i add $i,8,$i
.align 32 .align 32
.Louter: .Louter:
sub %g0,$num,$j sub %g0,$num,$j ! j=-num
add %sp,$bias+$frame+$locals,$tp add %sp,$bias+$frame+$locals,$tp
add $bp,$i,%o4 add $bp,$i,%o4
___
$code.=<<___ if ($bits==64);
ldx [$bp+$i],%o0 ! bp[i] ldx [$bp+$i],%o0 ! bp[i]
ldx [$ap+$j],%o1 ! ap[0] ldx [$ap+$j],%o1 ! ap[0]
___ sllx %o0,32,%g1
$code.=<<___ if ($bits==32); sllx %o1,32,%g5
ldd [$bp+$i],%o0 ! bp[i] srlx %o0,32,%o0
ldd [$ap+$j],%g2 ! ap[0] srlx %o1,32,%o1
sllx %o1,32,%o1 or %g1,%o0,%o0
sllx %g3,32,%g3 or %g5,%o1,%o1
or %o0,%o1,%o0
or %g2,%g3,%o1
___
$code.=<<___;
ldx [$tp],%o2 ! tp[0] ldx [$tp],%o2 ! tp[0]
mulx %o1,%o0,%o0 mulx %o1,%o0,%o0
addcc %o2,%o0,%o0 addcc %o2,%o0,%o0
mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0 mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0
stx %o0,[%sp+$bias+$frame+0] stx %o0,[%sp+$bias+$frame+0]
! transfer b[i] to FPU as 4x16-bit values ! transfer b[i] to FPU as 4x16-bit values
ldda [%o4+`$bits==32 ? 2 : 6`]%asi,$ba ldda [%o4+2]%asi,$ba
ldda [%o4+`$bits==32 ? 0 : 4`]%asi,$bb ldda [%o4+0]%asi,$bb
ldda [%o4+`$bits==32 ? 6 : 2`]%asi,$bc ldda [%o4+6]%asi,$bc
ldda [%o4+`$bits==32 ? 4 : 0`]%asi,$bd ldda [%o4+4]%asi,$bd
! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
ldda [%sp+$bias+$frame+6]%asi,$na ldda [%sp+$bias+$frame+6]%asi,$na
@ -483,24 +469,24 @@ $code.=<<___;
fmuld $alo,$bb,$alob fmuld $alo,$bb,$alob
fmuld $nlo,$nb,$nlob fmuld $nlo,$nb,$nlob
fmuld $alo,$bc,$aloc fmuld $alo,$bc,$aloc
fmuld $nlo,$nc,$nloc
faddd $aloa,$nloa,$nloa faddd $aloa,$nloa,$nloa
fmuld $nlo,$nc,$nloc
fmuld $alo,$bd,$alod fmuld $alo,$bd,$alod
fmuld $nlo,$nd,$nlod
faddd $alob,$nlob,$nlob faddd $alob,$nlob,$nlob
fmuld $nlo,$nd,$nlod
fmuld $ahi,$ba,$ahia fmuld $ahi,$ba,$ahia
fmuld $nhi,$na,$nhia
faddd $aloc,$nloc,$nloc faddd $aloc,$nloc,$nloc
fmuld $nhi,$na,$nhia
fmuld $ahi,$bb,$ahib fmuld $ahi,$bb,$ahib
fmuld $nhi,$nb,$nhib
faddd $alod,$nlod,$nlod faddd $alod,$nlod,$nlod
fmuld $nhi,$nb,$nhib
fmuld $ahi,$bc,$ahic fmuld $ahi,$bc,$ahic
fmuld $nhi,$nc,$nhic
faddd $ahia,$nhia,$nhia faddd $ahia,$nhia,$nhia
fmuld $nhi,$nc,$nhic
fmuld $ahi,$bd,$ahid fmuld $ahi,$bd,$ahid
faddd $ahib,$nhib,$nhib
fmuld $nhi,$nd,$nhid fmuld $nhi,$nd,$nhid
faddd $ahib,$nhib,$nhib
faddd $ahic,$nhic,$dota ! $nhic faddd $ahic,$nhic,$dota ! $nhic
faddd $ahid,$nhid,$dotb ! $nhid faddd $ahid,$nhid,$dotb ! $nhid
@ -558,24 +544,24 @@ $code.=<<___;
fmuld $alo,$bb,$alob fmuld $alo,$bb,$alob
fmuld $nlo,$nb,$nlob fmuld $nlo,$nb,$nlob
fmuld $alo,$bc,$aloc fmuld $alo,$bc,$aloc
fmuld $nlo,$nc,$nloc
faddd $aloa,$nloa,$nloa faddd $aloa,$nloa,$nloa
fmuld $nlo,$nc,$nloc
fmuld $alo,$bd,$alod fmuld $alo,$bd,$alod
fmuld $nlo,$nd,$nlod
faddd $alob,$nlob,$nlob faddd $alob,$nlob,$nlob
fmuld $nlo,$nd,$nlod
fmuld $ahi,$ba,$ahia fmuld $ahi,$ba,$ahia
fmuld $nhi,$na,$nhia
faddd $aloc,$nloc,$nloc faddd $aloc,$nloc,$nloc
fmuld $nhi,$na,$nhia
fmuld $ahi,$bb,$ahib fmuld $ahi,$bb,$ahib
fmuld $nhi,$nb,$nhib
faddd $alod,$nlod,$nlod faddd $alod,$nlod,$nlod
fmuld $nhi,$nb,$nhib
fmuld $ahi,$bc,$ahic fmuld $ahi,$bc,$ahic
fmuld $nhi,$nc,$nhic
faddd $ahia,$nhia,$nhia faddd $ahia,$nhia,$nhia
fmuld $nhi,$nc,$nhic
fmuld $ahi,$bd,$ahid fmuld $ahi,$bd,$ahid
faddd $ahib,$nhib,$nhib
fmuld $nhi,$nd,$nhid fmuld $nhi,$nd,$nhid
faddd $ahib,$nhib,$nhib
faddd $dota,$nloa,$nloa faddd $dota,$nloa,$nloa
faddd $dotb,$nlob,$nlob faddd $dotb,$nlob,$nlob
faddd $ahic,$nhic,$dota ! $nhic faddd $ahic,$nhic,$dota ! $nhic
@ -661,7 +647,7 @@ $code.=<<___;
add $tp,8,$tp ! adjust tp to point at the end add $tp,8,$tp ! adjust tp to point at the end
ld [$tp-8],%o0 ld [$tp-8],%o0
ld [$np-`$bits==32 ? 4 : 8`],%o1 ld [$np-4],%o1
cmp %o0,%o1 ! compare topmost words cmp %o0,%o1 ! compare topmost words
bcs,pt %icc,.Lcopy ! %icc.c is clean if not taken bcs,pt %icc,.Lcopy ! %icc.c is clean if not taken
nop nop
@ -670,41 +656,26 @@ $code.=<<___;
.Lsub: .Lsub:
ldd [$tp+%o7],%o0 ldd [$tp+%o7],%o0
ldd [$np+%o7],%o2 ldd [$np+%o7],%o2
___
$code.=<<___ if ($bits==64);
subccc %o1,%o3,%o3
subccc %o0,%o2,%o2
___
$code.=<<___ if ($bits==32);
subccc %o1,%o2,%o2 subccc %o1,%o2,%o2
subccc %o0,%o3,%o3 subccc %o0,%o3,%o3
___
$code.=<<___;
std %o2,[$rp+%o7] std %o2,[$rp+%o7]
add %o7,8,%o7 add %o7,8,%o7
brnz,pt %o7,.Lsub brnz,pt %o7,.Lsub
nop nop
subccc $carry,0,$carry subccc $carry,0,$carry
bcc,pt %icc,.Lzap bcc,pt %icc,.Lzap
sub %g0,$num,%o7 sub %g0,$num,%o7 ! n=-num
.align 16,0x1000000 .align 16,0x1000000
.Lcopy: .Lcopy:
ldx [$tp+%o7],%o0 ldx [$tp+%o7],%o0
___
$code.=<<___ if ($bits==64);
stx %o0,[$rp+%o7]
___
$code.=<<___ if ($bits==32);
srlx %o0,32,%o1 srlx %o0,32,%o1
std %o0,[$rp+%o7] std %o0,[$rp+%o7]
___
$code.=<<___;
add %o7,8,%o7 add %o7,8,%o7
brnz,pt %o7,.Lcopy brnz,pt %o7,.Lcopy
nop nop
ba .Lzap ba .Lzap
sub %g0,$num,%o7 sub %g0,$num,%o7 ! n=-num
.align 32 .align 32
.Lzap: .Lzap: