Synchronize message digests in 098-fips with 098.
This commit is contained in:
18
Configure
18
Configure
@@ -201,11 +201,11 @@ my %table=(
|
|||||||
"solaris-sparcv7-gcc","gcc:-O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"solaris-sparcv7-gcc","gcc:-O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"solaris-sparcv8-gcc","gcc:-mv8 -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"solaris-sparcv8-gcc","gcc:-mv8 -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
# -m32 should be safe to add as long as driver recognizes -mcpu=ultrasparc
|
# -m32 should be safe to add as long as driver recognizes -mcpu=ultrasparc
|
||||||
"solaris-sparcv9-gcc","gcc:-m32 -mcpu=ultrasparc -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"solaris-sparcv9-gcc","gcc:-m32 -mcpu=ultrasparc -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"solaris64-sparcv9-gcc","gcc:-m64 -mcpu=ultrasparc -O3 -Wall -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:::des_enc-sparc.o fcrypt_b.o:::md5-sparcv9.o::::::dlfcn:solaris-shared:-fPIC:-m64 -shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"solaris64-sparcv9-gcc","gcc:-m64 -mcpu=ultrasparc -O3 -Wall -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:::des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-m64 -shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
####
|
####
|
||||||
"debug-solaris-sparcv8-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -O -g -mv8 -Wall -DB_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"debug-solaris-sparcv8-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -O -g -mv8 -Wall -DB_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"debug-solaris-sparcv9-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -DPEDANTIC -O -g -mcpu=ultrasparc -pedantic -ansi -Wall -Wshadow -Wno-long-long -D__EXTENSIONS__ -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"debug-solaris-sparcv9-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -DPEDANTIC -O -g -mcpu=ultrasparc -pedantic -ansi -Wall -Wshadow -Wno-long-long -D__EXTENSIONS__ -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
|
|
||||||
#### SPARC Solaris with Sun C setups
|
#### SPARC Solaris with Sun C setups
|
||||||
# SC4.0 doesn't pass 'make test', upgrade to SC5.0 or SC4.2.
|
# SC4.0 doesn't pass 'make test', upgrade to SC5.0 or SC4.2.
|
||||||
@@ -213,11 +213,11 @@ my %table=(
|
|||||||
# SC5.0 note: Compiler common patch 107357-01 or later is required!
|
# SC5.0 note: Compiler common patch 107357-01 or later is required!
|
||||||
"solaris-sparcv7-cc","cc:-xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"solaris-sparcv7-cc","cc:-xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"solaris-sparcv8-cc","cc:-xarch=v8 -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"solaris-sparcv8-cc","cc:-xarch=v8 -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"solaris-sparcv9-cc","cc:-xtarget=ultra -xarch=v8plus -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"solaris-sparcv9-cc","cc:-xtarget=ultra -xarch=v8plus -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"solaris64-sparcv9-cc","cc:-xtarget=ultra -xarch=v9 -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:::des_enc-sparc.o fcrypt_b.o:::md5-sparcv9.o::::::dlfcn:solaris-shared:-KPIC:-xarch=v9 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/ccs/bin/ar rs",
|
"solaris64-sparcv9-cc","cc:-xtarget=ultra -xarch=v9 -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:::des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:solaris-shared:-KPIC:-xarch=v9 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/ccs/bin/ar rs",
|
||||||
####
|
####
|
||||||
"debug-solaris-sparcv8-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -xarch=v8 -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"debug-solaris-sparcv8-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -xarch=v8 -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8.o::::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"debug-solaris-sparcv9-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -xtarget=ultra -xarch=v8plus -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o::::md5-sparcv8plus.o::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"debug-solaris-sparcv9-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG_ALL -xtarget=ultra -xarch=v8plus -g -O -xstrconst -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR::sparcv8plus.o::::::::::dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
|
|
||||||
#### SunOS configs, assuming sparc for the gcc one.
|
#### SunOS configs, assuming sparc for the gcc one.
|
||||||
#"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST::(unknown):SUNOS::DES_UNROLL:${no_asm}::",
|
#"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST::(unknown):SUNOS::DES_UNROLL:${no_asm}::",
|
||||||
@@ -332,9 +332,9 @@ my %table=(
|
|||||||
"linux-sparcv8","gcc:-mv8 -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"linux-sparcv8","gcc:-mv8 -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
# it's a real mess with -mcpu=ultrasparc option under Linux, but
|
# it's a real mess with -mcpu=ultrasparc option under Linux, but
|
||||||
# -Wa,-Av8plus should do the trick no matter what.
|
# -Wa,-Av8plus should do the trick no matter what.
|
||||||
"linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plus -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::md5-sparcv8plus.o::::::dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plus -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::sparcv8plus.o:des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
# GCC 3.1 is a requirement
|
# GCC 3.1 is a requirement
|
||||||
"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::::::md5-sparcv9.o::::::dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::::::::::::dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
#### Alpha Linux with GNU C and Compaq C setups
|
#### Alpha Linux with GNU C and Compaq C setups
|
||||||
# Special notes:
|
# Special notes:
|
||||||
# - linux-alpha+bwx-gcc is ment to be used from ./config only. If you
|
# - linux-alpha+bwx-gcc is ment to be used from ./config only. If you
|
||||||
@@ -364,7 +364,7 @@ my %table=(
|
|||||||
# -DMD32_REG_T=int doesn't actually belong in sparc64 target, it
|
# -DMD32_REG_T=int doesn't actually belong in sparc64 target, it
|
||||||
# simply *happens* to work around a compiler bug in gcc 3.3.3,
|
# simply *happens* to work around a compiler bug in gcc 3.3.3,
|
||||||
# triggered by RIPEMD160 code.
|
# triggered by RIPEMD160 code.
|
||||||
"BSD-sparc64", "gcc:-DB_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR:::des_enc-sparc.o fcrypt_b.o:::md5-sparcv9.o::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"BSD-sparc64", "gcc:-DB_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR:::des_enc-sparc.o fcrypt_b.o:::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"BSD-ia64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK:${ia64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"BSD-ia64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK:${ia64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
"BSD-x86_64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
"BSD-x86_64", "gcc:-DL_ENDIAN -DTERMIOS -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||||
|
|
||||||
|
18
TABLE
18
TABLE
@@ -148,7 +148,7 @@ $bn_obj =
|
|||||||
$des_obj = des_enc-sparc.o fcrypt_b.o
|
$des_obj = des_enc-sparc.o fcrypt_b.o
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv9.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -1660,7 +1660,7 @@ $bn_obj = sparcv8plus.o
|
|||||||
$des_obj =
|
$des_obj =
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv8plus.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -1687,7 +1687,7 @@ $bn_obj = sparcv8plus.o
|
|||||||
$des_obj = des_enc-sparc.o fcrypt_b.o
|
$des_obj = des_enc-sparc.o fcrypt_b.o
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv8plus.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -2929,7 +2929,7 @@ $bn_obj = sparcv8plus.o
|
|||||||
$des_obj = des_enc-sparc.o fcrypt_b.o
|
$des_obj = des_enc-sparc.o fcrypt_b.o
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv8plus.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -2983,7 +2983,7 @@ $bn_obj =
|
|||||||
$des_obj =
|
$des_obj =
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv9.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -3577,7 +3577,7 @@ $bn_obj = sparcv8plus.o
|
|||||||
$des_obj = des_enc-sparc.o fcrypt_b.o
|
$des_obj = des_enc-sparc.o fcrypt_b.o
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv8plus.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -3604,7 +3604,7 @@ $bn_obj = sparcv8plus.o
|
|||||||
$des_obj = des_enc-sparc.o fcrypt_b.o
|
$des_obj = des_enc-sparc.o fcrypt_b.o
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv8plus.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -3685,7 +3685,7 @@ $bn_obj =
|
|||||||
$des_obj = des_enc-sparc.o fcrypt_b.o
|
$des_obj = des_enc-sparc.o fcrypt_b.o
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv9.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
@@ -3712,7 +3712,7 @@ $bn_obj =
|
|||||||
$des_obj = des_enc-sparc.o fcrypt_b.o
|
$des_obj = des_enc-sparc.o fcrypt_b.o
|
||||||
$aes_obj =
|
$aes_obj =
|
||||||
$bf_obj =
|
$bf_obj =
|
||||||
$md5_obj = md5-sparcv9.o
|
$md5_obj =
|
||||||
$sha1_obj =
|
$sha1_obj =
|
||||||
$cast_obj =
|
$cast_obj =
|
||||||
$rc4_obj =
|
$rc4_obj =
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
/* crypto/md32_common.h */
|
/* crypto/md32_common.h */
|
||||||
/* ====================================================================
|
/* ====================================================================
|
||||||
* Copyright (c) 1999-2002 The OpenSSL Project. All rights reserved.
|
* Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
@@ -47,10 +47,6 @@
|
|||||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
* ====================================================================
|
* ====================================================================
|
||||||
*
|
*
|
||||||
* This product includes cryptographic software written by Eric Young
|
|
||||||
* (eay@cryptsoft.com). This product includes software written by Tim
|
|
||||||
* Hudson (tjh@cryptsoft.com).
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -76,40 +72,27 @@
|
|||||||
* typedef struct {
|
* typedef struct {
|
||||||
* ...
|
* ...
|
||||||
* HASH_LONG Nl,Nh;
|
* HASH_LONG Nl,Nh;
|
||||||
|
* either {
|
||||||
* HASH_LONG data[HASH_LBLOCK];
|
* HASH_LONG data[HASH_LBLOCK];
|
||||||
|
* unsigned char data[HASH_CBLOCK];
|
||||||
|
* };
|
||||||
* unsigned int num;
|
* unsigned int num;
|
||||||
* ...
|
* ...
|
||||||
* } HASH_CTX;
|
* } HASH_CTX;
|
||||||
|
* data[] vector is expected to be zeroed upon first call to
|
||||||
|
* HASH_UPDATE.
|
||||||
* HASH_UPDATE
|
* HASH_UPDATE
|
||||||
* name of "Update" function, implemented here.
|
* name of "Update" function, implemented here.
|
||||||
* HASH_TRANSFORM
|
* HASH_TRANSFORM
|
||||||
* name of "Transform" function, implemented here.
|
* name of "Transform" function, implemented here.
|
||||||
* HASH_FINAL
|
* HASH_FINAL
|
||||||
* name of "Final" function, implemented here.
|
* name of "Final" function, implemented here.
|
||||||
* HASH_BLOCK_HOST_ORDER
|
|
||||||
* name of "block" function treating *aligned* input message
|
|
||||||
* in host byte order, implemented externally.
|
|
||||||
* HASH_BLOCK_DATA_ORDER
|
* HASH_BLOCK_DATA_ORDER
|
||||||
* name of "block" function treating *unaligned* input message
|
* name of "block" function capable of treating *unaligned* input
|
||||||
* in original (data) byte order, implemented externally (it
|
* message in original (data) byte order, implemented externally.
|
||||||
* actually is optional if data and host are of the same
|
|
||||||
* "endianess").
|
|
||||||
* HASH_MAKE_STRING
|
* HASH_MAKE_STRING
|
||||||
* macro convering context variables to an ASCII hash string.
|
* macro convering context variables to an ASCII hash string.
|
||||||
*
|
*
|
||||||
* Optional macros:
|
|
||||||
*
|
|
||||||
* B_ENDIAN or L_ENDIAN
|
|
||||||
* defines host byte-order.
|
|
||||||
* HASH_LONG_LOG2
|
|
||||||
* defaults to 2 if not states otherwise.
|
|
||||||
* HASH_LBLOCK
|
|
||||||
* assumed to be HASH_CBLOCK/4 if not stated otherwise.
|
|
||||||
* HASH_BLOCK_DATA_ORDER_ALIGNED
|
|
||||||
* alternative "block" function capable of treating
|
|
||||||
* aligned input message in original (data) order,
|
|
||||||
* implemented externally.
|
|
||||||
*
|
|
||||||
* MD5 example:
|
* MD5 example:
|
||||||
*
|
*
|
||||||
* #define DATA_ORDER_IS_LITTLE_ENDIAN
|
* #define DATA_ORDER_IS_LITTLE_ENDIAN
|
||||||
@@ -118,11 +101,9 @@
|
|||||||
* #define HASH_LONG_LOG2 MD5_LONG_LOG2
|
* #define HASH_LONG_LOG2 MD5_LONG_LOG2
|
||||||
* #define HASH_CTX MD5_CTX
|
* #define HASH_CTX MD5_CTX
|
||||||
* #define HASH_CBLOCK MD5_CBLOCK
|
* #define HASH_CBLOCK MD5_CBLOCK
|
||||||
* #define HASH_LBLOCK MD5_LBLOCK
|
|
||||||
* #define HASH_UPDATE MD5_Update
|
* #define HASH_UPDATE MD5_Update
|
||||||
* #define HASH_TRANSFORM MD5_Transform
|
* #define HASH_TRANSFORM MD5_Transform
|
||||||
* #define HASH_FINAL MD5_Final
|
* #define HASH_FINAL MD5_Final
|
||||||
* #define HASH_BLOCK_HOST_ORDER md5_block_host_order
|
|
||||||
* #define HASH_BLOCK_DATA_ORDER md5_block_data_order
|
* #define HASH_BLOCK_DATA_ORDER md5_block_data_order
|
||||||
*
|
*
|
||||||
* <appro@fy.chalmers.se>
|
* <appro@fy.chalmers.se>
|
||||||
@@ -152,27 +133,9 @@
|
|||||||
#error "HASH_FINAL must be defined!"
|
#error "HASH_FINAL must be defined!"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef HASH_BLOCK_HOST_ORDER
|
|
||||||
#error "HASH_BLOCK_HOST_ORDER must be defined!"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/*
|
|
||||||
* Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED
|
|
||||||
* isn't defined.
|
|
||||||
*/
|
|
||||||
#ifndef HASH_BLOCK_DATA_ORDER
|
#ifndef HASH_BLOCK_DATA_ORDER
|
||||||
#error "HASH_BLOCK_DATA_ORDER must be defined!"
|
#error "HASH_BLOCK_DATA_ORDER must be defined!"
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef HASH_LBLOCK
|
|
||||||
#define HASH_LBLOCK (HASH_CBLOCK/4)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef HASH_LONG_LOG2
|
|
||||||
#define HASH_LONG_LOG2 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Engage compiler specific rotate intrinsic function if available.
|
* Engage compiler specific rotate intrinsic function if available.
|
||||||
@@ -206,7 +169,8 @@
|
|||||||
: "cc"); \
|
: "cc"); \
|
||||||
ret; \
|
ret; \
|
||||||
})
|
})
|
||||||
# elif defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
|
# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
|
||||||
|
defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
|
||||||
# define ROTATE(a,n) ({ register unsigned int ret; \
|
# define ROTATE(a,n) ({ register unsigned int ret; \
|
||||||
asm ( \
|
asm ( \
|
||||||
"rlwinm %0,%1,%2,0,31" \
|
"rlwinm %0,%1,%2,0,31" \
|
||||||
@@ -214,80 +178,28 @@
|
|||||||
: "r"(a), "I"(n)); \
|
: "r"(a), "I"(n)); \
|
||||||
ret; \
|
ret; \
|
||||||
})
|
})
|
||||||
|
# elif defined(__s390x__)
|
||||||
|
# define ROTATE(a,n) ({ register unsigned int ret; \
|
||||||
|
asm ("rll %0,%1,%2" \
|
||||||
|
: "=r"(ret) \
|
||||||
|
: "r"(a), "I"(n)); \
|
||||||
|
ret; \
|
||||||
|
})
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
#endif /* PEDANTIC */
|
#endif /* PEDANTIC */
|
||||||
|
|
||||||
#if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */
|
|
||||||
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
|
|
||||||
#ifdef ROTATE
|
|
||||||
/* 5 instructions with rotate instruction, else 9 */
|
|
||||||
#define REVERSE_FETCH32(a,l) ( \
|
|
||||||
l=*(const HASH_LONG *)(a), \
|
|
||||||
((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \
|
|
||||||
)
|
|
||||||
#else
|
|
||||||
/* 6 instructions with rotate instruction, else 8 */
|
|
||||||
#define REVERSE_FETCH32(a,l) ( \
|
|
||||||
l=*(const HASH_LONG *)(a), \
|
|
||||||
l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \
|
|
||||||
ROTATE(l,16) \
|
|
||||||
)
|
|
||||||
/*
|
|
||||||
* Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
|
|
||||||
* It's rewritten as above for two reasons:
|
|
||||||
* - RISCs aren't good at long constants and have to explicitely
|
|
||||||
* compose 'em with several (well, usually 2) instructions in a
|
|
||||||
* register before performing the actual operation and (as you
|
|
||||||
* already realized:-) having same constant should inspire the
|
|
||||||
* compiler to permanently allocate the only register for it;
|
|
||||||
* - most modern CPUs have two ALUs, but usually only one has
|
|
||||||
* circuitry for shifts:-( this minor tweak inspires compiler
|
|
||||||
* to schedule shift instructions in a better way...
|
|
||||||
*
|
|
||||||
* <appro@fy.chalmers.se>
|
|
||||||
*/
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ROTATE
|
#ifndef ROTATE
|
||||||
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
|
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED
|
|
||||||
* and HASH_BLOCK_HOST_ORDER ought to be the same if input data
|
|
||||||
* and host are of the same "endianess". It's possible to mask
|
|
||||||
* this with blank #define HASH_BLOCK_DATA_ORDER though...
|
|
||||||
*
|
|
||||||
* <appro@fy.chalmers.se>
|
|
||||||
*/
|
|
||||||
#if defined(B_ENDIAN)
|
|
||||||
# if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
|
||||||
# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
|
|
||||||
# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#elif defined(L_ENDIAN)
|
|
||||||
# if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
|
||||||
# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
|
|
||||||
# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
|
|
||||||
#ifndef HASH_BLOCK_DATA_ORDER
|
|
||||||
#error "HASH_BLOCK_DATA_ORDER must be defined!"
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
||||||
|
|
||||||
#ifndef PEDANTIC
|
#ifndef PEDANTIC
|
||||||
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
|
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
|
||||||
# if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
|
# if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
|
||||||
(defined(__x86_64) || defined(__x86_64__))
|
(defined(__x86_64) || defined(__x86_64__))
|
||||||
|
# if !defined(B_ENDIAN)
|
||||||
/*
|
/*
|
||||||
* This gives ~30-40% performance improvement in SHA-256 compiled
|
* This gives ~30-40% performance improvement in SHA-256 compiled
|
||||||
* with gcc [on P4]. Well, first macro to be frank. We can pull
|
* with gcc [on P4]. Well, first macro to be frank. We can pull
|
||||||
@@ -302,6 +214,11 @@
|
|||||||
*((unsigned int *)(c))=r; (c)+=4; r; })
|
*((unsigned int *)(c))=r; (c)+=4; r; })
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
#if defined(__s390__) || defined(__s390x__)
|
||||||
|
# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
|
||||||
|
# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef HOST_c2l
|
#ifndef HOST_c2l
|
||||||
@@ -311,29 +228,6 @@
|
|||||||
l|=(((unsigned long)(*((c)++))) ), \
|
l|=(((unsigned long)(*((c)++))) ), \
|
||||||
l)
|
l)
|
||||||
#endif
|
#endif
|
||||||
#define HOST_p_c2l(c,l,n) { \
|
|
||||||
switch (n) { \
|
|
||||||
case 0: l =((unsigned long)(*((c)++)))<<24; \
|
|
||||||
case 1: l|=((unsigned long)(*((c)++)))<<16; \
|
|
||||||
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
|
|
||||||
case 3: l|=((unsigned long)(*((c)++))); \
|
|
||||||
} }
|
|
||||||
#define HOST_p_c2l_p(c,l,sc,len) { \
|
|
||||||
switch (sc) { \
|
|
||||||
case 0: l =((unsigned long)(*((c)++)))<<24; \
|
|
||||||
if (--len == 0) break; \
|
|
||||||
case 1: l|=((unsigned long)(*((c)++)))<<16; \
|
|
||||||
if (--len == 0) break; \
|
|
||||||
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
|
|
||||||
} }
|
|
||||||
/* NOTE the pointer is not incremented at the end of this */
|
|
||||||
#define HOST_c2l_p(c,l,n) { \
|
|
||||||
l=0; (c)+=n; \
|
|
||||||
switch (n) { \
|
|
||||||
case 3: l =((unsigned long)(*(--(c))))<< 8; \
|
|
||||||
case 2: l|=((unsigned long)(*(--(c))))<<16; \
|
|
||||||
case 1: l|=((unsigned long)(*(--(c))))<<24; \
|
|
||||||
} }
|
|
||||||
#ifndef HOST_l2c
|
#ifndef HOST_l2c
|
||||||
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
|
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
|
||||||
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
|
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
|
||||||
@@ -344,6 +238,18 @@
|
|||||||
|
|
||||||
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
||||||
|
|
||||||
|
#ifndef PEDANTIC
|
||||||
|
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
|
||||||
|
# if defined(__s390x__)
|
||||||
|
# define HOST_c2l(c,l) ({ asm ("lrv %0,0(%1)" \
|
||||||
|
:"=r"(l) : "r"(c)); \
|
||||||
|
(c)+=4; (l); })
|
||||||
|
# define HOST_l2c(l,c) ({ asm ("strv %0,0(%1)" \
|
||||||
|
: : "r"(l),"r"(c) : "memory"); \
|
||||||
|
(c)+=4; (l); })
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
|
#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
|
||||||
# ifndef B_ENDIAN
|
# ifndef B_ENDIAN
|
||||||
/* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
|
/* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
|
||||||
@@ -359,29 +265,6 @@
|
|||||||
l|=(((unsigned long)(*((c)++)))<<24), \
|
l|=(((unsigned long)(*((c)++)))<<24), \
|
||||||
l)
|
l)
|
||||||
#endif
|
#endif
|
||||||
#define HOST_p_c2l(c,l,n) { \
|
|
||||||
switch (n) { \
|
|
||||||
case 0: l =((unsigned long)(*((c)++))); \
|
|
||||||
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
|
|
||||||
case 2: l|=((unsigned long)(*((c)++)))<<16; \
|
|
||||||
case 3: l|=((unsigned long)(*((c)++)))<<24; \
|
|
||||||
} }
|
|
||||||
#define HOST_p_c2l_p(c,l,sc,len) { \
|
|
||||||
switch (sc) { \
|
|
||||||
case 0: l =((unsigned long)(*((c)++))); \
|
|
||||||
if (--len == 0) break; \
|
|
||||||
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
|
|
||||||
if (--len == 0) break; \
|
|
||||||
case 2: l|=((unsigned long)(*((c)++)))<<16; \
|
|
||||||
} }
|
|
||||||
/* NOTE the pointer is not incremented at the end of this */
|
|
||||||
#define HOST_c2l_p(c,l,n) { \
|
|
||||||
l=0; (c)+=n; \
|
|
||||||
switch (n) { \
|
|
||||||
case 3: l =((unsigned long)(*(--(c))))<<16; \
|
|
||||||
case 2: l|=((unsigned long)(*(--(c))))<< 8; \
|
|
||||||
case 1: l|=((unsigned long)(*(--(c)))); \
|
|
||||||
} }
|
|
||||||
#ifndef HOST_l2c
|
#ifndef HOST_l2c
|
||||||
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
|
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
|
||||||
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
|
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
|
||||||
@@ -399,9 +282,9 @@
|
|||||||
int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
|
int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
|
||||||
{
|
{
|
||||||
const unsigned char *data=data_;
|
const unsigned char *data=data_;
|
||||||
register HASH_LONG * p;
|
unsigned char *p;
|
||||||
register HASH_LONG l;
|
HASH_LONG l;
|
||||||
size_t sw,sc,ew,ec;
|
size_t n;
|
||||||
|
|
||||||
if (len==0) return 1;
|
if (len==0) return 1;
|
||||||
|
|
||||||
@@ -413,101 +296,43 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
|
|||||||
c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */
|
c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */
|
||||||
c->Nl=l;
|
c->Nl=l;
|
||||||
|
|
||||||
if (c->num != 0)
|
n = c->num;
|
||||||
|
if (n != 0)
|
||||||
{
|
{
|
||||||
p=c->data;
|
p=(unsigned char *)c->data;
|
||||||
sw=c->num>>2;
|
|
||||||
sc=c->num&0x03;
|
|
||||||
|
|
||||||
if ((c->num+len) >= HASH_CBLOCK)
|
if ((n+len) >= HASH_CBLOCK)
|
||||||
{
|
{
|
||||||
l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
|
memcpy (p+n,data,HASH_CBLOCK-n);
|
||||||
for (; sw<HASH_LBLOCK; sw++)
|
HASH_BLOCK_DATA_ORDER (c,p,1);
|
||||||
{
|
n = HASH_CBLOCK-n;
|
||||||
HOST_c2l(data,l); p[sw]=l;
|
data += n;
|
||||||
}
|
len -= n;
|
||||||
HASH_BLOCK_HOST_ORDER (c,p,1);
|
c->num = 0;
|
||||||
len-=(HASH_CBLOCK-c->num);
|
memset (p,0,HASH_CBLOCK); /* keep it zeroed */
|
||||||
c->num=0;
|
|
||||||
/* drop through and do the rest */
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
c->num+=(unsigned int)len;
|
memcpy (p+n,data,len);
|
||||||
if ((sc+len) < 4) /* ugly, add char's to a word */
|
c->num += (unsigned int)len;
|
||||||
{
|
|
||||||
l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ew=(c->num>>2);
|
|
||||||
ec=(c->num&0x03);
|
|
||||||
if (sc)
|
|
||||||
l=p[sw];
|
|
||||||
HOST_p_c2l(data,l,sc);
|
|
||||||
p[sw++]=l;
|
|
||||||
for (; sw < ew; sw++)
|
|
||||||
{
|
|
||||||
HOST_c2l(data,l); p[sw]=l;
|
|
||||||
}
|
|
||||||
if (ec)
|
|
||||||
{
|
|
||||||
HOST_c2l_p(data,l,ec); p[sw]=l;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sw=len/HASH_CBLOCK;
|
n = len/HASH_CBLOCK;
|
||||||
if (sw > 0)
|
if (n > 0)
|
||||||
{
|
{
|
||||||
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
|
HASH_BLOCK_DATA_ORDER (c,data,n);
|
||||||
/*
|
n *= HASH_CBLOCK;
|
||||||
* Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
|
data += n;
|
||||||
* only if sizeof(HASH_LONG)==4.
|
len -= n;
|
||||||
*/
|
|
||||||
if ((((size_t)data)%4) == 0)
|
|
||||||
{
|
|
||||||
/* data is properly aligned so that we can cast it: */
|
|
||||||
HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw);
|
|
||||||
sw*=HASH_CBLOCK;
|
|
||||||
data+=sw;
|
|
||||||
len-=sw;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#if !defined(HASH_BLOCK_DATA_ORDER)
|
|
||||||
while (sw--)
|
|
||||||
{
|
|
||||||
memcpy (p=c->data,data,HASH_CBLOCK);
|
|
||||||
HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1);
|
|
||||||
data+=HASH_CBLOCK;
|
|
||||||
len-=HASH_CBLOCK;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#if defined(HASH_BLOCK_DATA_ORDER)
|
|
||||||
{
|
|
||||||
HASH_BLOCK_DATA_ORDER(c,data,sw);
|
|
||||||
sw*=HASH_CBLOCK;
|
|
||||||
data+=sw;
|
|
||||||
len-=sw;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len!=0)
|
if (len != 0)
|
||||||
{
|
{
|
||||||
p = c->data;
|
p = (unsigned char *)c->data;
|
||||||
c->num = len;
|
c->num = len;
|
||||||
ew=len>>2; /* words to copy */
|
memcpy (p,data,len);
|
||||||
ec=len&0x03;
|
|
||||||
for (; ew; ew--,p++)
|
|
||||||
{
|
|
||||||
HOST_c2l(data,l); *p=l;
|
|
||||||
}
|
|
||||||
HOST_c2l_p(data,l,ec);
|
|
||||||
*p=l;
|
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -515,73 +340,38 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
|
|||||||
|
|
||||||
void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
|
void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
|
||||||
{
|
{
|
||||||
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
|
|
||||||
if ((((size_t)data)%4) == 0)
|
|
||||||
/* data is properly aligned so that we can cast it: */
|
|
||||||
HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1);
|
|
||||||
else
|
|
||||||
#if !defined(HASH_BLOCK_DATA_ORDER)
|
|
||||||
{
|
|
||||||
memcpy (c->data,data,HASH_CBLOCK);
|
|
||||||
HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#if defined(HASH_BLOCK_DATA_ORDER)
|
|
||||||
HASH_BLOCK_DATA_ORDER (c,data,1);
|
HASH_BLOCK_DATA_ORDER (c,data,1);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int HASH_FINAL (unsigned char *md, HASH_CTX *c)
|
int HASH_FINAL (unsigned char *md, HASH_CTX *c)
|
||||||
{
|
{
|
||||||
register HASH_LONG *p;
|
unsigned char *p = (unsigned char *)c->data;
|
||||||
register unsigned long l;
|
size_t n = c->num;
|
||||||
register int i,j;
|
|
||||||
static const unsigned char end[4]={0x80,0x00,0x00,0x00};
|
|
||||||
const unsigned char *cp=end;
|
|
||||||
|
|
||||||
/* c->num should definitly have room for at least one more byte. */
|
p[n] = 0x80; /* there is always room for one */
|
||||||
p=c->data;
|
n++;
|
||||||
i=c->num>>2;
|
|
||||||
j=c->num&0x03;
|
|
||||||
|
|
||||||
#if 0
|
if (n > (HASH_CBLOCK-8))
|
||||||
/* purify often complains about the following line as an
|
|
||||||
* Uninitialized Memory Read. While this can be true, the
|
|
||||||
* following p_c2l macro will reset l when that case is true.
|
|
||||||
* This is because j&0x03 contains the number of 'valid' bytes
|
|
||||||
* already in p[i]. If and only if j&0x03 == 0, the UMR will
|
|
||||||
* occur but this is also the only time p_c2l will do
|
|
||||||
* l= *(cp++) instead of l|= *(cp++)
|
|
||||||
* Many thanks to Alex Tang <altitude@cic.net> for pickup this
|
|
||||||
* 'potential bug' */
|
|
||||||
#ifdef PURIFY
|
|
||||||
if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */
|
|
||||||
#endif
|
|
||||||
l=p[i];
|
|
||||||
#else
|
|
||||||
l = (j==0) ? 0 : p[i];
|
|
||||||
#endif
|
|
||||||
HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
|
|
||||||
|
|
||||||
if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */
|
|
||||||
{
|
{
|
||||||
if (i<HASH_LBLOCK) p[i]=0;
|
memset (p+n,0,HASH_CBLOCK-n);
|
||||||
HASH_BLOCK_HOST_ORDER (c,p,1);
|
n=0;
|
||||||
i=0;
|
HASH_BLOCK_DATA_ORDER (c,p,1);
|
||||||
}
|
}
|
||||||
for (; i<(HASH_LBLOCK-2); i++)
|
memset (p+n,0,HASH_CBLOCK-8-n);
|
||||||
p[i]=0;
|
|
||||||
|
|
||||||
|
p += HASH_CBLOCK-8;
|
||||||
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
#if defined(DATA_ORDER_IS_BIG_ENDIAN)
|
||||||
p[HASH_LBLOCK-2]=c->Nh;
|
(void)HOST_l2c(c->Nh,p);
|
||||||
p[HASH_LBLOCK-1]=c->Nl;
|
(void)HOST_l2c(c->Nl,p);
|
||||||
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
|
||||||
p[HASH_LBLOCK-2]=c->Nl;
|
(void)HOST_l2c(c->Nl,p);
|
||||||
p[HASH_LBLOCK-1]=c->Nh;
|
(void)HOST_l2c(c->Nh,p);
|
||||||
#endif
|
#endif
|
||||||
HASH_BLOCK_HOST_ORDER (c,p,1);
|
p -= HASH_CBLOCK;
|
||||||
|
HASH_BLOCK_DATA_ORDER (c,p,1);
|
||||||
|
c->num=0;
|
||||||
|
memset (p,0,HASH_CBLOCK);
|
||||||
|
|
||||||
#ifndef HASH_MAKE_STRING
|
#ifndef HASH_MAKE_STRING
|
||||||
#error "HASH_MAKE_STRING must be defined!"
|
#error "HASH_MAKE_STRING must be defined!"
|
||||||
@@ -589,11 +379,6 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c)
|
|||||||
HASH_MAKE_STRING(c,md);
|
HASH_MAKE_STRING(c,md);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
c->num=0;
|
|
||||||
/* clear stuff, HASH_BLOCK may be leaving some stuff on the stack
|
|
||||||
* but I'm not worried :-)
|
|
||||||
OPENSSL_cleanse((void *)c,sizeof(HASH_CTX));
|
|
||||||
*/
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -82,79 +82,6 @@ int MD4_Init(MD4_CTX *c)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef md4_block_host_order
|
|
||||||
void md4_block_host_order (MD4_CTX *c, const void *data, size_t num)
|
|
||||||
{
|
|
||||||
const MD4_LONG *X=data;
|
|
||||||
register unsigned MD32_REG_T A,B,C,D;
|
|
||||||
|
|
||||||
A=c->A;
|
|
||||||
B=c->B;
|
|
||||||
C=c->C;
|
|
||||||
D=c->D;
|
|
||||||
|
|
||||||
for (;num--;X+=HASH_LBLOCK)
|
|
||||||
{
|
|
||||||
/* Round 0 */
|
|
||||||
R0(A,B,C,D,X[ 0], 3,0);
|
|
||||||
R0(D,A,B,C,X[ 1], 7,0);
|
|
||||||
R0(C,D,A,B,X[ 2],11,0);
|
|
||||||
R0(B,C,D,A,X[ 3],19,0);
|
|
||||||
R0(A,B,C,D,X[ 4], 3,0);
|
|
||||||
R0(D,A,B,C,X[ 5], 7,0);
|
|
||||||
R0(C,D,A,B,X[ 6],11,0);
|
|
||||||
R0(B,C,D,A,X[ 7],19,0);
|
|
||||||
R0(A,B,C,D,X[ 8], 3,0);
|
|
||||||
R0(D,A,B,C,X[ 9], 7,0);
|
|
||||||
R0(C,D,A,B,X[10],11,0);
|
|
||||||
R0(B,C,D,A,X[11],19,0);
|
|
||||||
R0(A,B,C,D,X[12], 3,0);
|
|
||||||
R0(D,A,B,C,X[13], 7,0);
|
|
||||||
R0(C,D,A,B,X[14],11,0);
|
|
||||||
R0(B,C,D,A,X[15],19,0);
|
|
||||||
/* Round 1 */
|
|
||||||
R1(A,B,C,D,X[ 0], 3,0x5A827999L);
|
|
||||||
R1(D,A,B,C,X[ 4], 5,0x5A827999L);
|
|
||||||
R1(C,D,A,B,X[ 8], 9,0x5A827999L);
|
|
||||||
R1(B,C,D,A,X[12],13,0x5A827999L);
|
|
||||||
R1(A,B,C,D,X[ 1], 3,0x5A827999L);
|
|
||||||
R1(D,A,B,C,X[ 5], 5,0x5A827999L);
|
|
||||||
R1(C,D,A,B,X[ 9], 9,0x5A827999L);
|
|
||||||
R1(B,C,D,A,X[13],13,0x5A827999L);
|
|
||||||
R1(A,B,C,D,X[ 2], 3,0x5A827999L);
|
|
||||||
R1(D,A,B,C,X[ 6], 5,0x5A827999L);
|
|
||||||
R1(C,D,A,B,X[10], 9,0x5A827999L);
|
|
||||||
R1(B,C,D,A,X[14],13,0x5A827999L);
|
|
||||||
R1(A,B,C,D,X[ 3], 3,0x5A827999L);
|
|
||||||
R1(D,A,B,C,X[ 7], 5,0x5A827999L);
|
|
||||||
R1(C,D,A,B,X[11], 9,0x5A827999L);
|
|
||||||
R1(B,C,D,A,X[15],13,0x5A827999L);
|
|
||||||
/* Round 2 */
|
|
||||||
R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1);
|
|
||||||
R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1);
|
|
||||||
R2(C,D,A,B,X[ 4],11,0x6ED9EBA1);
|
|
||||||
R2(B,C,D,A,X[12],15,0x6ED9EBA1);
|
|
||||||
R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1);
|
|
||||||
R2(D,A,B,C,X[10], 9,0x6ED9EBA1);
|
|
||||||
R2(C,D,A,B,X[ 6],11,0x6ED9EBA1);
|
|
||||||
R2(B,C,D,A,X[14],15,0x6ED9EBA1);
|
|
||||||
R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1);
|
|
||||||
R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1);
|
|
||||||
R2(C,D,A,B,X[ 5],11,0x6ED9EBA1);
|
|
||||||
R2(B,C,D,A,X[13],15,0x6ED9EBA1);
|
|
||||||
R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1);
|
|
||||||
R2(D,A,B,C,X[11], 9,0x6ED9EBA1);
|
|
||||||
R2(C,D,A,B,X[ 7],11,0x6ED9EBA1);
|
|
||||||
R2(B,C,D,A,X[15],15,0x6ED9EBA1);
|
|
||||||
|
|
||||||
A = c->A += A;
|
|
||||||
B = c->B += B;
|
|
||||||
C = c->C += C;
|
|
||||||
D = c->D += D;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef md4_block_data_order
|
#ifndef md4_block_data_order
|
||||||
#ifdef X
|
#ifdef X
|
||||||
#undef X
|
#undef X
|
||||||
@@ -240,19 +167,3 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef undef
|
|
||||||
int printit(unsigned long *l)
|
|
||||||
{
|
|
||||||
int i,ii;
|
|
||||||
|
|
||||||
for (i=0; i<2; i++)
|
|
||||||
{
|
|
||||||
for (ii=0; ii<8; ii++)
|
|
||||||
{
|
|
||||||
fprintf(stderr,"%08lx ",l[i*8+ii]);
|
|
||||||
}
|
|
||||||
fprintf(stderr,"\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
@@ -65,43 +65,13 @@
|
|||||||
#define MD4_LONG_LOG2 2 /* default to 32 bits */
|
#define MD4_LONG_LOG2 2 /* default to 32 bits */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void md4_block_host_order (MD4_CTX *c, const void *p,size_t num);
|
|
||||||
void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
|
void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
|
||||||
|
|
||||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
|
|
||||||
# if !defined(B_ENDIAN)
|
|
||||||
/*
|
|
||||||
* *_block_host_order is expected to handle aligned data while
|
|
||||||
* *_block_data_order - unaligned. As algorithm and host (x86)
|
|
||||||
* are in this case of the same "endianness" these two are
|
|
||||||
* otherwise indistinguishable. But normally you don't want to
|
|
||||||
* call the same function because unaligned access in places
|
|
||||||
* where alignment is expected is usually a "Bad Thing". Indeed,
|
|
||||||
* on RISCs you get punished with BUS ERROR signal or *severe*
|
|
||||||
* performance degradation. Intel CPUs are in turn perfectly
|
|
||||||
* capable of loading unaligned data without such drastic side
|
|
||||||
* effect. Yes, they say it's slower than aligned load, but no
|
|
||||||
* exception is generated and therefore performance degradation
|
|
||||||
* is *incomparable* with RISCs. What we should weight here is
|
|
||||||
* costs of unaligned access against costs of aligning data.
|
|
||||||
* According to my measurements allowing unaligned access results
|
|
||||||
* in ~9% performance improvement on Pentium II operating at
|
|
||||||
* 266MHz. I won't be surprised if the difference will be higher
|
|
||||||
* on faster systems:-)
|
|
||||||
*
|
|
||||||
* <appro@fy.chalmers.se>
|
|
||||||
*/
|
|
||||||
# define md4_block_data_order md4_block_host_order
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
||||||
|
|
||||||
#define HASH_LONG MD4_LONG
|
#define HASH_LONG MD4_LONG
|
||||||
#define HASH_LONG_LOG2 MD4_LONG_LOG2
|
|
||||||
#define HASH_CTX MD4_CTX
|
#define HASH_CTX MD4_CTX
|
||||||
#define HASH_CBLOCK MD4_CBLOCK
|
#define HASH_CBLOCK MD4_CBLOCK
|
||||||
#define HASH_LBLOCK MD4_LBLOCK
|
|
||||||
#define HASH_UPDATE MD4_Update
|
#define HASH_UPDATE MD4_Update
|
||||||
#define HASH_TRANSFORM MD4_Transform
|
#define HASH_TRANSFORM MD4_Transform
|
||||||
#define HASH_FINAL MD4_Final
|
#define HASH_FINAL MD4_Final
|
||||||
@@ -112,21 +82,7 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
|
|||||||
ll=(c)->C; HOST_l2c(ll,(s)); \
|
ll=(c)->C; HOST_l2c(ll,(s)); \
|
||||||
ll=(c)->D; HOST_l2c(ll,(s)); \
|
ll=(c)->D; HOST_l2c(ll,(s)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#define HASH_BLOCK_HOST_ORDER md4_block_host_order
|
|
||||||
#if !defined(L_ENDIAN) || defined(md4_block_data_order)
|
|
||||||
#define HASH_BLOCK_DATA_ORDER md4_block_data_order
|
#define HASH_BLOCK_DATA_ORDER md4_block_data_order
|
||||||
/*
|
|
||||||
* Little-endians (Intel and Alpha) feel better without this.
|
|
||||||
* It looks like memcpy does better job than generic
|
|
||||||
* md4_block_data_order on copying-n-aligning input data.
|
|
||||||
* But frankly speaking I didn't expect such result on Alpha.
|
|
||||||
* On the other hand I've got this with egcs-1.0.2 and if
|
|
||||||
* program is compiled with another (better?) compiler it
|
|
||||||
* might turn out other way around.
|
|
||||||
*
|
|
||||||
* <appro@fy.chalmers.se>
|
|
||||||
*/
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "md32_common.h"
|
#include "md32_common.h"
|
||||||
|
|
||||||
|
@@ -52,24 +52,6 @@ mx86-cof.s: asm/md5-586.pl ../perlasm/x86asm.pl
|
|||||||
mx86-out.s: asm/md5-586.pl ../perlasm/x86asm.pl
|
mx86-out.s: asm/md5-586.pl ../perlasm/x86asm.pl
|
||||||
(cd asm; $(PERL) md5-586.pl a.out $(CFLAGS) > ../$@)
|
(cd asm; $(PERL) md5-586.pl a.out $(CFLAGS) > ../$@)
|
||||||
|
|
||||||
md5-sparcv8plus.o: asm/md5-sparcv9.S
|
|
||||||
$(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -c \
|
|
||||||
-o md5-sparcv8plus.o asm/md5-sparcv9.S
|
|
||||||
|
|
||||||
# Old GNU assembler doesn't understand V9 instructions, so we
|
|
||||||
# hire /usr/ccs/bin/as to do the job. Note that option is called
|
|
||||||
# *-gcc27, but even gcc 2>=8 users may experience similar problem
|
|
||||||
# if they didn't bother to upgrade GNU assembler. Such users should
|
|
||||||
# not choose this option, but be adviced to *remove* GNU assembler
|
|
||||||
# or upgrade it.
|
|
||||||
md5-sparcv8plus-gcc27.o: asm/md5-sparcv9.S
|
|
||||||
$(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -E asm/md5-sparcv9.S | \
|
|
||||||
/usr/ccs/bin/as -xarch=v8plus - -o md5-sparcv8plus-gcc27.o
|
|
||||||
|
|
||||||
md5-sparcv9.o: asm/md5-sparcv9.S
|
|
||||||
$(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -c \
|
|
||||||
-o md5-sparcv9.o asm/md5-sparcv9.S
|
|
||||||
|
|
||||||
md5-x86_64.s: asm/md5-x86_64.pl; $(PERL) asm/md5-x86_64.pl $@
|
md5-x86_64.s: asm/md5-x86_64.pl; $(PERL) asm/md5-x86_64.pl $@
|
||||||
|
|
||||||
files:
|
files:
|
||||||
|
@@ -29,7 +29,7 @@ $X="esi";
|
|||||||
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
|
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
|
||||||
);
|
);
|
||||||
|
|
||||||
&md5_block("md5_block_asm_host_order");
|
&md5_block("md5_block_asm_data_order");
|
||||||
&asm_finish();
|
&asm_finish();
|
||||||
|
|
||||||
sub Np
|
sub Np
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -111,9 +111,9 @@ $code .= <<EOF;
|
|||||||
.text
|
.text
|
||||||
.align 16
|
.align 16
|
||||||
|
|
||||||
.globl md5_block_asm_host_order
|
.globl md5_block_asm_data_order
|
||||||
.type md5_block_asm_host_order,\@function,3
|
.type md5_block_asm_data_order,\@function,3
|
||||||
md5_block_asm_host_order:
|
md5_block_asm_data_order:
|
||||||
push %rbp
|
push %rbp
|
||||||
push %rbx
|
push %rbx
|
||||||
push %r14
|
push %r14
|
||||||
@@ -237,7 +237,7 @@ $code .= <<EOF;
|
|||||||
pop %rbx
|
pop %rbx
|
||||||
pop %rbp
|
pop %rbp
|
||||||
ret
|
ret
|
||||||
.size md5_block_asm_host_order,.-md5_block_asm_host_order
|
.size md5_block_asm_data_order,.-md5_block_asm_data_order
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
print $code;
|
print $code;
|
||||||
|
@@ -82,96 +82,6 @@ int MD5_Init(MD5_CTX *c)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef md5_block_host_order
|
|
||||||
void md5_block_host_order (MD5_CTX *c, const void *data, size_t num)
|
|
||||||
{
|
|
||||||
const MD5_LONG *X=data;
|
|
||||||
register unsigned MD32_REG_T A,B,C,D;
|
|
||||||
|
|
||||||
A=c->A;
|
|
||||||
B=c->B;
|
|
||||||
C=c->C;
|
|
||||||
D=c->D;
|
|
||||||
|
|
||||||
for (;num--;X+=HASH_LBLOCK)
|
|
||||||
{
|
|
||||||
/* Round 0 */
|
|
||||||
R0(A,B,C,D,X[ 0], 7,0xd76aa478L);
|
|
||||||
R0(D,A,B,C,X[ 1],12,0xe8c7b756L);
|
|
||||||
R0(C,D,A,B,X[ 2],17,0x242070dbL);
|
|
||||||
R0(B,C,D,A,X[ 3],22,0xc1bdceeeL);
|
|
||||||
R0(A,B,C,D,X[ 4], 7,0xf57c0fafL);
|
|
||||||
R0(D,A,B,C,X[ 5],12,0x4787c62aL);
|
|
||||||
R0(C,D,A,B,X[ 6],17,0xa8304613L);
|
|
||||||
R0(B,C,D,A,X[ 7],22,0xfd469501L);
|
|
||||||
R0(A,B,C,D,X[ 8], 7,0x698098d8L);
|
|
||||||
R0(D,A,B,C,X[ 9],12,0x8b44f7afL);
|
|
||||||
R0(C,D,A,B,X[10],17,0xffff5bb1L);
|
|
||||||
R0(B,C,D,A,X[11],22,0x895cd7beL);
|
|
||||||
R0(A,B,C,D,X[12], 7,0x6b901122L);
|
|
||||||
R0(D,A,B,C,X[13],12,0xfd987193L);
|
|
||||||
R0(C,D,A,B,X[14],17,0xa679438eL);
|
|
||||||
R0(B,C,D,A,X[15],22,0x49b40821L);
|
|
||||||
/* Round 1 */
|
|
||||||
R1(A,B,C,D,X[ 1], 5,0xf61e2562L);
|
|
||||||
R1(D,A,B,C,X[ 6], 9,0xc040b340L);
|
|
||||||
R1(C,D,A,B,X[11],14,0x265e5a51L);
|
|
||||||
R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL);
|
|
||||||
R1(A,B,C,D,X[ 5], 5,0xd62f105dL);
|
|
||||||
R1(D,A,B,C,X[10], 9,0x02441453L);
|
|
||||||
R1(C,D,A,B,X[15],14,0xd8a1e681L);
|
|
||||||
R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L);
|
|
||||||
R1(A,B,C,D,X[ 9], 5,0x21e1cde6L);
|
|
||||||
R1(D,A,B,C,X[14], 9,0xc33707d6L);
|
|
||||||
R1(C,D,A,B,X[ 3],14,0xf4d50d87L);
|
|
||||||
R1(B,C,D,A,X[ 8],20,0x455a14edL);
|
|
||||||
R1(A,B,C,D,X[13], 5,0xa9e3e905L);
|
|
||||||
R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L);
|
|
||||||
R1(C,D,A,B,X[ 7],14,0x676f02d9L);
|
|
||||||
R1(B,C,D,A,X[12],20,0x8d2a4c8aL);
|
|
||||||
/* Round 2 */
|
|
||||||
R2(A,B,C,D,X[ 5], 4,0xfffa3942L);
|
|
||||||
R2(D,A,B,C,X[ 8],11,0x8771f681L);
|
|
||||||
R2(C,D,A,B,X[11],16,0x6d9d6122L);
|
|
||||||
R2(B,C,D,A,X[14],23,0xfde5380cL);
|
|
||||||
R2(A,B,C,D,X[ 1], 4,0xa4beea44L);
|
|
||||||
R2(D,A,B,C,X[ 4],11,0x4bdecfa9L);
|
|
||||||
R2(C,D,A,B,X[ 7],16,0xf6bb4b60L);
|
|
||||||
R2(B,C,D,A,X[10],23,0xbebfbc70L);
|
|
||||||
R2(A,B,C,D,X[13], 4,0x289b7ec6L);
|
|
||||||
R2(D,A,B,C,X[ 0],11,0xeaa127faL);
|
|
||||||
R2(C,D,A,B,X[ 3],16,0xd4ef3085L);
|
|
||||||
R2(B,C,D,A,X[ 6],23,0x04881d05L);
|
|
||||||
R2(A,B,C,D,X[ 9], 4,0xd9d4d039L);
|
|
||||||
R2(D,A,B,C,X[12],11,0xe6db99e5L);
|
|
||||||
R2(C,D,A,B,X[15],16,0x1fa27cf8L);
|
|
||||||
R2(B,C,D,A,X[ 2],23,0xc4ac5665L);
|
|
||||||
/* Round 3 */
|
|
||||||
R3(A,B,C,D,X[ 0], 6,0xf4292244L);
|
|
||||||
R3(D,A,B,C,X[ 7],10,0x432aff97L);
|
|
||||||
R3(C,D,A,B,X[14],15,0xab9423a7L);
|
|
||||||
R3(B,C,D,A,X[ 5],21,0xfc93a039L);
|
|
||||||
R3(A,B,C,D,X[12], 6,0x655b59c3L);
|
|
||||||
R3(D,A,B,C,X[ 3],10,0x8f0ccc92L);
|
|
||||||
R3(C,D,A,B,X[10],15,0xffeff47dL);
|
|
||||||
R3(B,C,D,A,X[ 1],21,0x85845dd1L);
|
|
||||||
R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL);
|
|
||||||
R3(D,A,B,C,X[15],10,0xfe2ce6e0L);
|
|
||||||
R3(C,D,A,B,X[ 6],15,0xa3014314L);
|
|
||||||
R3(B,C,D,A,X[13],21,0x4e0811a1L);
|
|
||||||
R3(A,B,C,D,X[ 4], 6,0xf7537e82L);
|
|
||||||
R3(D,A,B,C,X[11],10,0xbd3af235L);
|
|
||||||
R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
|
|
||||||
R3(B,C,D,A,X[ 9],21,0xeb86d391L);
|
|
||||||
|
|
||||||
A = c->A += A;
|
|
||||||
B = c->B += B;
|
|
||||||
C = c->C += C;
|
|
||||||
D = c->D += D;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef md5_block_data_order
|
#ifndef md5_block_data_order
|
||||||
#ifdef X
|
#ifdef X
|
||||||
#undef X
|
#undef X
|
||||||
@@ -274,19 +184,3 @@ void md5_block_data_order (MD5_CTX *c, const void *data_, size_t num)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef undef
|
|
||||||
int printit(unsigned long *l)
|
|
||||||
{
|
|
||||||
int i,ii;
|
|
||||||
|
|
||||||
for (i=0; i<2; i++)
|
|
||||||
{
|
|
||||||
for (ii=0; ii<8; ii++)
|
|
||||||
{
|
|
||||||
fprintf(stderr,"%08lx ",l[i*8+ii]);
|
|
||||||
}
|
|
||||||
fprintf(stderr,"\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
@@ -66,53 +66,19 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MD5_ASM
|
#ifdef MD5_ASM
|
||||||
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__)
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \
|
||||||
# if !defined(B_ENDIAN)
|
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
|
||||||
# define md5_block_host_order md5_block_asm_host_order
|
# define md5_block_data_order md5_block_asm_data_order
|
||||||
# endif
|
|
||||||
# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC)
|
|
||||||
void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,size_t num);
|
|
||||||
# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
|
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void md5_block_host_order (MD5_CTX *c, const void *p,size_t num);
|
|
||||||
void md5_block_data_order (MD5_CTX *c, const void *p,size_t num);
|
void md5_block_data_order (MD5_CTX *c, const void *p,size_t num);
|
||||||
|
|
||||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__)
|
|
||||||
# if !defined(B_ENDIAN)
|
|
||||||
/*
|
|
||||||
* *_block_host_order is expected to handle aligned data while
|
|
||||||
* *_block_data_order - unaligned. As algorithm and host (x86)
|
|
||||||
* are in this case of the same "endianness" these two are
|
|
||||||
* otherwise indistinguishable. But normally you don't want to
|
|
||||||
* call the same function because unaligned access in places
|
|
||||||
* where alignment is expected is usually a "Bad Thing". Indeed,
|
|
||||||
* on RISCs you get punished with BUS ERROR signal or *severe*
|
|
||||||
* performance degradation. Intel CPUs are in turn perfectly
|
|
||||||
* capable of loading unaligned data without such drastic side
|
|
||||||
* effect. Yes, they say it's slower than aligned load, but no
|
|
||||||
* exception is generated and therefore performance degradation
|
|
||||||
* is *incomparable* with RISCs. What we should weight here is
|
|
||||||
* costs of unaligned access against costs of aligning data.
|
|
||||||
* According to my measurements allowing unaligned access results
|
|
||||||
* in ~9% performance improvement on Pentium II operating at
|
|
||||||
* 266MHz. I won't be surprised if the difference will be higher
|
|
||||||
* on faster systems:-)
|
|
||||||
*
|
|
||||||
* <appro@fy.chalmers.se>
|
|
||||||
*/
|
|
||||||
# define md5_block_data_order md5_block_host_order
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
||||||
|
|
||||||
#define HASH_LONG MD5_LONG
|
#define HASH_LONG MD5_LONG
|
||||||
#define HASH_LONG_LOG2 MD5_LONG_LOG2
|
|
||||||
#define HASH_CTX MD5_CTX
|
#define HASH_CTX MD5_CTX
|
||||||
#define HASH_CBLOCK MD5_CBLOCK
|
#define HASH_CBLOCK MD5_CBLOCK
|
||||||
#define HASH_LBLOCK MD5_LBLOCK
|
|
||||||
#define HASH_UPDATE MD5_Update
|
#define HASH_UPDATE MD5_Update
|
||||||
#define HASH_TRANSFORM MD5_Transform
|
#define HASH_TRANSFORM MD5_Transform
|
||||||
#define HASH_FINAL MD5_Final
|
#define HASH_FINAL MD5_Final
|
||||||
@@ -123,21 +89,7 @@ void md5_block_data_order (MD5_CTX *c, const void *p,size_t num);
|
|||||||
ll=(c)->C; HOST_l2c(ll,(s)); \
|
ll=(c)->C; HOST_l2c(ll,(s)); \
|
||||||
ll=(c)->D; HOST_l2c(ll,(s)); \
|
ll=(c)->D; HOST_l2c(ll,(s)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#define HASH_BLOCK_HOST_ORDER md5_block_host_order
|
|
||||||
#if !defined(L_ENDIAN) || defined(md5_block_data_order)
|
|
||||||
#define HASH_BLOCK_DATA_ORDER md5_block_data_order
|
#define HASH_BLOCK_DATA_ORDER md5_block_data_order
|
||||||
/*
|
|
||||||
* Little-endians (Intel and Alpha) feel better without this.
|
|
||||||
* It looks like memcpy does better job than generic
|
|
||||||
* md5_block_data_order on copying-n-aligning input data.
|
|
||||||
* But frankly speaking I didn't expect such result on Alpha.
|
|
||||||
* On the other hand I've got this with egcs-1.0.2 and if
|
|
||||||
* program is compiled with another (better?) compiler it
|
|
||||||
* might turn out other way around.
|
|
||||||
*
|
|
||||||
* <appro@fy.chalmers.se>
|
|
||||||
*/
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "md32_common.h"
|
#include "md32_common.h"
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/local/bin/perl
|
#!/usr/local/bin/perl
|
||||||
|
|
||||||
# Normal is the
|
# Normal is the
|
||||||
# ripemd160_block_asm_host_order(RIPEMD160_CTX *c, ULONG *X,int blocks);
|
# ripemd160_block_asm_data_order(RIPEMD160_CTX *c, ULONG *X,int blocks);
|
||||||
|
|
||||||
$normal=0;
|
$normal=0;
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ $KR3=0x7A6D76E9;
|
|||||||
8, 5,12, 9,12, 5,14, 6, 8,13, 6, 5,15,13,11,11,
|
8, 5,12, 9,12, 5,14, 6, 8,13, 6, 5,15,13,11,11,
|
||||||
);
|
);
|
||||||
|
|
||||||
&ripemd160_block("ripemd160_block_asm_host_order");
|
&ripemd160_block("ripemd160_block_asm_data_order");
|
||||||
&asm_finish();
|
&asm_finish();
|
||||||
|
|
||||||
sub Xv
|
sub Xv
|
||||||
|
@@ -82,207 +82,6 @@ int RIPEMD160_Init(RIPEMD160_CTX *c)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef ripemd160_block_host_order
|
|
||||||
#ifdef X
|
|
||||||
#undef X
|
|
||||||
#endif
|
|
||||||
#define X(i) XX[i]
|
|
||||||
void ripemd160_block_host_order (RIPEMD160_CTX *ctx, const void *p, size_t num)
|
|
||||||
{
|
|
||||||
const RIPEMD160_LONG *XX=p;
|
|
||||||
register unsigned MD32_REG_T A,B,C,D,E;
|
|
||||||
register unsigned MD32_REG_T a,b,c,d,e;
|
|
||||||
|
|
||||||
for (;num--;XX+=HASH_LBLOCK)
|
|
||||||
{
|
|
||||||
|
|
||||||
A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E;
|
|
||||||
|
|
||||||
RIP1(A,B,C,D,E,WL00,SL00);
|
|
||||||
RIP1(E,A,B,C,D,WL01,SL01);
|
|
||||||
RIP1(D,E,A,B,C,WL02,SL02);
|
|
||||||
RIP1(C,D,E,A,B,WL03,SL03);
|
|
||||||
RIP1(B,C,D,E,A,WL04,SL04);
|
|
||||||
RIP1(A,B,C,D,E,WL05,SL05);
|
|
||||||
RIP1(E,A,B,C,D,WL06,SL06);
|
|
||||||
RIP1(D,E,A,B,C,WL07,SL07);
|
|
||||||
RIP1(C,D,E,A,B,WL08,SL08);
|
|
||||||
RIP1(B,C,D,E,A,WL09,SL09);
|
|
||||||
RIP1(A,B,C,D,E,WL10,SL10);
|
|
||||||
RIP1(E,A,B,C,D,WL11,SL11);
|
|
||||||
RIP1(D,E,A,B,C,WL12,SL12);
|
|
||||||
RIP1(C,D,E,A,B,WL13,SL13);
|
|
||||||
RIP1(B,C,D,E,A,WL14,SL14);
|
|
||||||
RIP1(A,B,C,D,E,WL15,SL15);
|
|
||||||
|
|
||||||
RIP2(E,A,B,C,D,WL16,SL16,KL1);
|
|
||||||
RIP2(D,E,A,B,C,WL17,SL17,KL1);
|
|
||||||
RIP2(C,D,E,A,B,WL18,SL18,KL1);
|
|
||||||
RIP2(B,C,D,E,A,WL19,SL19,KL1);
|
|
||||||
RIP2(A,B,C,D,E,WL20,SL20,KL1);
|
|
||||||
RIP2(E,A,B,C,D,WL21,SL21,KL1);
|
|
||||||
RIP2(D,E,A,B,C,WL22,SL22,KL1);
|
|
||||||
RIP2(C,D,E,A,B,WL23,SL23,KL1);
|
|
||||||
RIP2(B,C,D,E,A,WL24,SL24,KL1);
|
|
||||||
RIP2(A,B,C,D,E,WL25,SL25,KL1);
|
|
||||||
RIP2(E,A,B,C,D,WL26,SL26,KL1);
|
|
||||||
RIP2(D,E,A,B,C,WL27,SL27,KL1);
|
|
||||||
RIP2(C,D,E,A,B,WL28,SL28,KL1);
|
|
||||||
RIP2(B,C,D,E,A,WL29,SL29,KL1);
|
|
||||||
RIP2(A,B,C,D,E,WL30,SL30,KL1);
|
|
||||||
RIP2(E,A,B,C,D,WL31,SL31,KL1);
|
|
||||||
|
|
||||||
RIP3(D,E,A,B,C,WL32,SL32,KL2);
|
|
||||||
RIP3(C,D,E,A,B,WL33,SL33,KL2);
|
|
||||||
RIP3(B,C,D,E,A,WL34,SL34,KL2);
|
|
||||||
RIP3(A,B,C,D,E,WL35,SL35,KL2);
|
|
||||||
RIP3(E,A,B,C,D,WL36,SL36,KL2);
|
|
||||||
RIP3(D,E,A,B,C,WL37,SL37,KL2);
|
|
||||||
RIP3(C,D,E,A,B,WL38,SL38,KL2);
|
|
||||||
RIP3(B,C,D,E,A,WL39,SL39,KL2);
|
|
||||||
RIP3(A,B,C,D,E,WL40,SL40,KL2);
|
|
||||||
RIP3(E,A,B,C,D,WL41,SL41,KL2);
|
|
||||||
RIP3(D,E,A,B,C,WL42,SL42,KL2);
|
|
||||||
RIP3(C,D,E,A,B,WL43,SL43,KL2);
|
|
||||||
RIP3(B,C,D,E,A,WL44,SL44,KL2);
|
|
||||||
RIP3(A,B,C,D,E,WL45,SL45,KL2);
|
|
||||||
RIP3(E,A,B,C,D,WL46,SL46,KL2);
|
|
||||||
RIP3(D,E,A,B,C,WL47,SL47,KL2);
|
|
||||||
|
|
||||||
RIP4(C,D,E,A,B,WL48,SL48,KL3);
|
|
||||||
RIP4(B,C,D,E,A,WL49,SL49,KL3);
|
|
||||||
RIP4(A,B,C,D,E,WL50,SL50,KL3);
|
|
||||||
RIP4(E,A,B,C,D,WL51,SL51,KL3);
|
|
||||||
RIP4(D,E,A,B,C,WL52,SL52,KL3);
|
|
||||||
RIP4(C,D,E,A,B,WL53,SL53,KL3);
|
|
||||||
RIP4(B,C,D,E,A,WL54,SL54,KL3);
|
|
||||||
RIP4(A,B,C,D,E,WL55,SL55,KL3);
|
|
||||||
RIP4(E,A,B,C,D,WL56,SL56,KL3);
|
|
||||||
RIP4(D,E,A,B,C,WL57,SL57,KL3);
|
|
||||||
RIP4(C,D,E,A,B,WL58,SL58,KL3);
|
|
||||||
RIP4(B,C,D,E,A,WL59,SL59,KL3);
|
|
||||||
RIP4(A,B,C,D,E,WL60,SL60,KL3);
|
|
||||||
RIP4(E,A,B,C,D,WL61,SL61,KL3);
|
|
||||||
RIP4(D,E,A,B,C,WL62,SL62,KL3);
|
|
||||||
RIP4(C,D,E,A,B,WL63,SL63,KL3);
|
|
||||||
|
|
||||||
RIP5(B,C,D,E,A,WL64,SL64,KL4);
|
|
||||||
RIP5(A,B,C,D,E,WL65,SL65,KL4);
|
|
||||||
RIP5(E,A,B,C,D,WL66,SL66,KL4);
|
|
||||||
RIP5(D,E,A,B,C,WL67,SL67,KL4);
|
|
||||||
RIP5(C,D,E,A,B,WL68,SL68,KL4);
|
|
||||||
RIP5(B,C,D,E,A,WL69,SL69,KL4);
|
|
||||||
RIP5(A,B,C,D,E,WL70,SL70,KL4);
|
|
||||||
RIP5(E,A,B,C,D,WL71,SL71,KL4);
|
|
||||||
RIP5(D,E,A,B,C,WL72,SL72,KL4);
|
|
||||||
RIP5(C,D,E,A,B,WL73,SL73,KL4);
|
|
||||||
RIP5(B,C,D,E,A,WL74,SL74,KL4);
|
|
||||||
RIP5(A,B,C,D,E,WL75,SL75,KL4);
|
|
||||||
RIP5(E,A,B,C,D,WL76,SL76,KL4);
|
|
||||||
RIP5(D,E,A,B,C,WL77,SL77,KL4);
|
|
||||||
RIP5(C,D,E,A,B,WL78,SL78,KL4);
|
|
||||||
RIP5(B,C,D,E,A,WL79,SL79,KL4);
|
|
||||||
|
|
||||||
a=A; b=B; c=C; d=D; e=E;
|
|
||||||
/* Do other half */
|
|
||||||
A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E;
|
|
||||||
|
|
||||||
RIP5(A,B,C,D,E,WR00,SR00,KR0);
|
|
||||||
RIP5(E,A,B,C,D,WR01,SR01,KR0);
|
|
||||||
RIP5(D,E,A,B,C,WR02,SR02,KR0);
|
|
||||||
RIP5(C,D,E,A,B,WR03,SR03,KR0);
|
|
||||||
RIP5(B,C,D,E,A,WR04,SR04,KR0);
|
|
||||||
RIP5(A,B,C,D,E,WR05,SR05,KR0);
|
|
||||||
RIP5(E,A,B,C,D,WR06,SR06,KR0);
|
|
||||||
RIP5(D,E,A,B,C,WR07,SR07,KR0);
|
|
||||||
RIP5(C,D,E,A,B,WR08,SR08,KR0);
|
|
||||||
RIP5(B,C,D,E,A,WR09,SR09,KR0);
|
|
||||||
RIP5(A,B,C,D,E,WR10,SR10,KR0);
|
|
||||||
RIP5(E,A,B,C,D,WR11,SR11,KR0);
|
|
||||||
RIP5(D,E,A,B,C,WR12,SR12,KR0);
|
|
||||||
RIP5(C,D,E,A,B,WR13,SR13,KR0);
|
|
||||||
RIP5(B,C,D,E,A,WR14,SR14,KR0);
|
|
||||||
RIP5(A,B,C,D,E,WR15,SR15,KR0);
|
|
||||||
|
|
||||||
RIP4(E,A,B,C,D,WR16,SR16,KR1);
|
|
||||||
RIP4(D,E,A,B,C,WR17,SR17,KR1);
|
|
||||||
RIP4(C,D,E,A,B,WR18,SR18,KR1);
|
|
||||||
RIP4(B,C,D,E,A,WR19,SR19,KR1);
|
|
||||||
RIP4(A,B,C,D,E,WR20,SR20,KR1);
|
|
||||||
RIP4(E,A,B,C,D,WR21,SR21,KR1);
|
|
||||||
RIP4(D,E,A,B,C,WR22,SR22,KR1);
|
|
||||||
RIP4(C,D,E,A,B,WR23,SR23,KR1);
|
|
||||||
RIP4(B,C,D,E,A,WR24,SR24,KR1);
|
|
||||||
RIP4(A,B,C,D,E,WR25,SR25,KR1);
|
|
||||||
RIP4(E,A,B,C,D,WR26,SR26,KR1);
|
|
||||||
RIP4(D,E,A,B,C,WR27,SR27,KR1);
|
|
||||||
RIP4(C,D,E,A,B,WR28,SR28,KR1);
|
|
||||||
RIP4(B,C,D,E,A,WR29,SR29,KR1);
|
|
||||||
RIP4(A,B,C,D,E,WR30,SR30,KR1);
|
|
||||||
RIP4(E,A,B,C,D,WR31,SR31,KR1);
|
|
||||||
|
|
||||||
RIP3(D,E,A,B,C,WR32,SR32,KR2);
|
|
||||||
RIP3(C,D,E,A,B,WR33,SR33,KR2);
|
|
||||||
RIP3(B,C,D,E,A,WR34,SR34,KR2);
|
|
||||||
RIP3(A,B,C,D,E,WR35,SR35,KR2);
|
|
||||||
RIP3(E,A,B,C,D,WR36,SR36,KR2);
|
|
||||||
RIP3(D,E,A,B,C,WR37,SR37,KR2);
|
|
||||||
RIP3(C,D,E,A,B,WR38,SR38,KR2);
|
|
||||||
RIP3(B,C,D,E,A,WR39,SR39,KR2);
|
|
||||||
RIP3(A,B,C,D,E,WR40,SR40,KR2);
|
|
||||||
RIP3(E,A,B,C,D,WR41,SR41,KR2);
|
|
||||||
RIP3(D,E,A,B,C,WR42,SR42,KR2);
|
|
||||||
RIP3(C,D,E,A,B,WR43,SR43,KR2);
|
|
||||||
RIP3(B,C,D,E,A,WR44,SR44,KR2);
|
|
||||||
RIP3(A,B,C,D,E,WR45,SR45,KR2);
|
|
||||||
RIP3(E,A,B,C,D,WR46,SR46,KR2);
|
|
||||||
RIP3(D,E,A,B,C,WR47,SR47,KR2);
|
|
||||||
|
|
||||||
RIP2(C,D,E,A,B,WR48,SR48,KR3);
|
|
||||||
RIP2(B,C,D,E,A,WR49,SR49,KR3);
|
|
||||||
RIP2(A,B,C,D,E,WR50,SR50,KR3);
|
|
||||||
RIP2(E,A,B,C,D,WR51,SR51,KR3);
|
|
||||||
RIP2(D,E,A,B,C,WR52,SR52,KR3);
|
|
||||||
RIP2(C,D,E,A,B,WR53,SR53,KR3);
|
|
||||||
RIP2(B,C,D,E,A,WR54,SR54,KR3);
|
|
||||||
RIP2(A,B,C,D,E,WR55,SR55,KR3);
|
|
||||||
RIP2(E,A,B,C,D,WR56,SR56,KR3);
|
|
||||||
RIP2(D,E,A,B,C,WR57,SR57,KR3);
|
|
||||||
RIP2(C,D,E,A,B,WR58,SR58,KR3);
|
|
||||||
RIP2(B,C,D,E,A,WR59,SR59,KR3);
|
|
||||||
RIP2(A,B,C,D,E,WR60,SR60,KR3);
|
|
||||||
RIP2(E,A,B,C,D,WR61,SR61,KR3);
|
|
||||||
RIP2(D,E,A,B,C,WR62,SR62,KR3);
|
|
||||||
RIP2(C,D,E,A,B,WR63,SR63,KR3);
|
|
||||||
|
|
||||||
RIP1(B,C,D,E,A,WR64,SR64);
|
|
||||||
RIP1(A,B,C,D,E,WR65,SR65);
|
|
||||||
RIP1(E,A,B,C,D,WR66,SR66);
|
|
||||||
RIP1(D,E,A,B,C,WR67,SR67);
|
|
||||||
RIP1(C,D,E,A,B,WR68,SR68);
|
|
||||||
RIP1(B,C,D,E,A,WR69,SR69);
|
|
||||||
RIP1(A,B,C,D,E,WR70,SR70);
|
|
||||||
RIP1(E,A,B,C,D,WR71,SR71);
|
|
||||||
RIP1(D,E,A,B,C,WR72,SR72);
|
|
||||||
RIP1(C,D,E,A,B,WR73,SR73);
|
|
||||||
RIP1(B,C,D,E,A,WR74,SR74);
|
|
||||||
RIP1(A,B,C,D,E,WR75,SR75);
|
|
||||||
RIP1(E,A,B,C,D,WR76,SR76);
|
|
||||||
RIP1(D,E,A,B,C,WR77,SR77);
|
|
||||||
RIP1(C,D,E,A,B,WR78,SR78);
|
|
||||||
RIP1(B,C,D,E,A,WR79,SR79);
|
|
||||||
|
|
||||||
D =ctx->B+c+D;
|
|
||||||
ctx->B=ctx->C+d+E;
|
|
||||||
ctx->C=ctx->D+e+A;
|
|
||||||
ctx->D=ctx->E+a+B;
|
|
||||||
ctx->E=ctx->A+b+C;
|
|
||||||
ctx->A=D;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ripemd160_block_data_order
|
#ifndef ripemd160_block_data_order
|
||||||
#ifdef X
|
#ifdef X
|
||||||
#undef X
|
#undef X
|
||||||
|
@@ -72,32 +72,20 @@
|
|||||||
*/
|
*/
|
||||||
#ifdef RMD160_ASM
|
#ifdef RMD160_ASM
|
||||||
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
|
||||||
# if !defined(B_ENDIAN)
|
# define ripemd160_block_data_order ripemd160_block_asm_data_order
|
||||||
# define ripemd160_block_host_order ripemd160_block_asm_host_order
|
|
||||||
# endif
|
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ripemd160_block_host_order (RIPEMD160_CTX *c, const void *p,size_t num);
|
|
||||||
void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num);
|
void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num);
|
||||||
|
|
||||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
|
|
||||||
# if !defined(B_ENDIAN)
|
|
||||||
# define ripemd160_block_data_order ripemd160_block_host_order
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
||||||
|
|
||||||
#define HASH_LONG RIPEMD160_LONG
|
#define HASH_LONG RIPEMD160_LONG
|
||||||
#define HASH_LONG_LOG2 RIPEMD160_LONG_LOG2
|
|
||||||
#define HASH_CTX RIPEMD160_CTX
|
#define HASH_CTX RIPEMD160_CTX
|
||||||
#define HASH_CBLOCK RIPEMD160_CBLOCK
|
#define HASH_CBLOCK RIPEMD160_CBLOCK
|
||||||
#define HASH_LBLOCK RIPEMD160_LBLOCK
|
|
||||||
#define HASH_UPDATE RIPEMD160_Update
|
#define HASH_UPDATE RIPEMD160_Update
|
||||||
#define HASH_TRANSFORM RIPEMD160_Transform
|
#define HASH_TRANSFORM RIPEMD160_Transform
|
||||||
#define HASH_FINAL RIPEMD160_Final
|
#define HASH_FINAL RIPEMD160_Final
|
||||||
#define HASH_BLOCK_HOST_ORDER ripemd160_block_host_order
|
|
||||||
#define HASH_MAKE_STRING(c,s) do { \
|
#define HASH_MAKE_STRING(c,s) do { \
|
||||||
unsigned long ll; \
|
unsigned long ll; \
|
||||||
ll=(c)->A; HOST_l2c(ll,(s)); \
|
ll=(c)->A; HOST_l2c(ll,(s)); \
|
||||||
@@ -106,9 +94,7 @@ void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num);
|
|||||||
ll=(c)->D; HOST_l2c(ll,(s)); \
|
ll=(c)->D; HOST_l2c(ll,(s)); \
|
||||||
ll=(c)->E; HOST_l2c(ll,(s)); \
|
ll=(c)->E; HOST_l2c(ll,(s)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#if !defined(L_ENDIAN) || defined(ripemd160_block_data_order)
|
|
||||||
#define HASH_BLOCK_DATA_ORDER ripemd160_block_data_order
|
#define HASH_BLOCK_DATA_ORDER ripemd160_block_data_order
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "md32_common.h"
|
#include "md32_common.h"
|
||||||
|
|
||||||
|
@@ -1,4 +1,16 @@
|
|||||||
#!/usr/local/bin/perl
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
|
# ====================================================================
|
||||||
|
# [Re]written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||||
|
# project. The module is, however, dual licensed under OpenSSL and
|
||||||
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||||
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||||
|
# ====================================================================
|
||||||
|
|
||||||
|
# "[Re]written" was achieved in two major overhauls. In 2004 BODY_*
|
||||||
|
# functions were re-implemented to address P4 performance issue [see
|
||||||
|
# commentary below], and in 2006 the rest was rewritten in order to
|
||||||
|
# gain freedom to liberate licensing terms.
|
||||||
|
|
||||||
# It was noted that Intel IA-32 C compiler generates code which
|
# It was noted that Intel IA-32 C compiler generates code which
|
||||||
# performs ~30% *faster* on P4 CPU than original *hand-coded*
|
# performs ~30% *faster* on P4 CPU than original *hand-coded*
|
||||||
@@ -17,90 +29,27 @@
|
|||||||
# improvement on P4 outweights the loss and incorporate this
|
# improvement on P4 outweights the loss and incorporate this
|
||||||
# re-tuned code to 0.9.7 and later.
|
# re-tuned code to 0.9.7 and later.
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
# Those who for any particular reason absolutely must score on
|
|
||||||
# Pentium can replace this module with one from 0.9.6 distribution.
|
|
||||||
# This "offer" shall be revoked the moment programming interface to
|
|
||||||
# this module is changed, in which case this paragraph should be
|
|
||||||
# removed.
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# <appro@fy.chalmers.se>
|
# <appro@fy.chalmers.se>
|
||||||
|
|
||||||
$normal=0;
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||||
|
push(@INC,"${dir}","${dir}../../perlasm");
|
||||||
push(@INC,"perlasm","../../perlasm");
|
|
||||||
require "x86asm.pl";
|
require "x86asm.pl";
|
||||||
|
|
||||||
&asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386");
|
&asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386");
|
||||||
|
|
||||||
$A="eax";
|
$A="eax";
|
||||||
$B="ecx";
|
$B="ebx";
|
||||||
$C="ebx";
|
$C="ecx";
|
||||||
$D="edx";
|
$D="edx";
|
||||||
$E="edi";
|
$E="edi";
|
||||||
$T="esi";
|
$T="esi";
|
||||||
$tmp1="ebp";
|
$tmp1="ebp";
|
||||||
|
|
||||||
$off=9*4;
|
@V=($A,$B,$C,$D,$E,$T);
|
||||||
|
|
||||||
@K=(0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6);
|
|
||||||
|
|
||||||
&sha1_block_data("sha1_block_asm_data_order");
|
|
||||||
|
|
||||||
&asm_finish();
|
|
||||||
|
|
||||||
sub Nn
|
|
||||||
{
|
|
||||||
local($p)=@_;
|
|
||||||
local(%n)=($A,$T,$B,$A,$C,$B,$D,$C,$E,$D,$T,$E);
|
|
||||||
return($n{$p});
|
|
||||||
}
|
|
||||||
|
|
||||||
sub Np
|
|
||||||
{
|
|
||||||
local($p)=@_;
|
|
||||||
local(%n)=($A,$T,$B,$A,$C,$B,$D,$C,$E,$D,$T,$E);
|
|
||||||
local(%n)=($A,$B,$B,$C,$C,$D,$D,$E,$E,$T,$T,$A);
|
|
||||||
return($n{$p});
|
|
||||||
}
|
|
||||||
|
|
||||||
sub Na
|
|
||||||
{
|
|
||||||
local($n)=@_;
|
|
||||||
return( (($n )&0x0f),
|
|
||||||
(($n+ 2)&0x0f),
|
|
||||||
(($n+ 8)&0x0f),
|
|
||||||
(($n+13)&0x0f),
|
|
||||||
(($n+ 1)&0x0f));
|
|
||||||
}
|
|
||||||
|
|
||||||
sub X_expand
|
|
||||||
{
|
|
||||||
local($in)=@_;
|
|
||||||
|
|
||||||
&comment("First, load the words onto the stack in network byte order");
|
|
||||||
for ($i=0; $i<16; $i+=2)
|
|
||||||
{
|
|
||||||
&mov($A,&DWP(($i+0)*4,$in,"",0));# unless $i == 0;
|
|
||||||
&mov($B,&DWP(($i+1)*4,$in,"",0));
|
|
||||||
&bswap($A);
|
|
||||||
&bswap($B);
|
|
||||||
&mov(&swtmp($i+0),$A);
|
|
||||||
&mov(&swtmp($i+1),$B);
|
|
||||||
}
|
|
||||||
|
|
||||||
&comment("We now have the X array on the stack");
|
|
||||||
&comment("starting at sp-4");
|
|
||||||
}
|
|
||||||
|
|
||||||
# Rules of engagement
|
|
||||||
# F is always trashable at the start, the running total.
|
|
||||||
# E becomes the next F so it can be trashed after it has been 'accumulated'
|
|
||||||
# F becomes A in the next round. We don't need to access it much.
|
|
||||||
# During the X update part, the result ends up in $X[$n0].
|
|
||||||
|
|
||||||
sub BODY_00_15
|
sub BODY_00_15
|
||||||
{
|
{
|
||||||
local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_;
|
local($n,$a,$b,$c,$d,$e,$f)=@_;
|
||||||
|
|
||||||
&comment("00_15 $n");
|
&comment("00_15 $n");
|
||||||
|
|
||||||
@@ -109,37 +58,37 @@ sub BODY_00_15
|
|||||||
else { &mov($a,$tmp1); }
|
else { &mov($a,$tmp1); }
|
||||||
&rotl($tmp1,5); # tmp1=ROTATE(a,5)
|
&rotl($tmp1,5); # tmp1=ROTATE(a,5)
|
||||||
&xor($f,$d);
|
&xor($f,$d);
|
||||||
&and($f,$b);
|
|
||||||
&add($tmp1,$e); # tmp1+=e;
|
&add($tmp1,$e); # tmp1+=e;
|
||||||
&mov($e,&swtmp($n)); # e becomes volatile and
|
&and($f,$b);
|
||||||
# is loaded with xi
|
&mov($e,&swtmp($n%16)); # e becomes volatile and is loaded
|
||||||
|
# with xi, also note that e becomes
|
||||||
|
# f in next round...
|
||||||
&xor($f,$d); # f holds F_00_19(b,c,d)
|
&xor($f,$d); # f holds F_00_19(b,c,d)
|
||||||
&rotr($b,2); # b=ROTATE(b,30)
|
&rotr($b,2); # b=ROTATE(b,30)
|
||||||
&lea($tmp1,&DWP($K,$tmp1,$e,1));# tmp1+=K_00_19+xi
|
&lea($tmp1,&DWP(0x5a827999,$tmp1,$e)); # tmp1+=K_00_19+xi
|
||||||
|
|
||||||
if ($n==15) { &add($f,$tmp1); } # f+=tmp1
|
if ($n==15) { &add($f,$tmp1); } # f+=tmp1
|
||||||
else { &add($tmp1,$f); }
|
else { &add($tmp1,$f); } # f becomes a in next round
|
||||||
}
|
}
|
||||||
|
|
||||||
sub BODY_16_19
|
sub BODY_16_19
|
||||||
{
|
{
|
||||||
local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_;
|
local($n,$a,$b,$c,$d,$e,$f)=@_;
|
||||||
local($n0,$n1,$n2,$n3,$np)=&Na($n);
|
|
||||||
|
|
||||||
&comment("16_19 $n");
|
&comment("16_19 $n");
|
||||||
|
|
||||||
&mov($f,&swtmp($n1)); # f to hold Xupdate(xi,xa,xb,xc,xd)
|
&mov($f,&swtmp($n%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
|
||||||
&mov($tmp1,$c); # tmp1 to hold F_00_19(b,c,d)
|
&mov($tmp1,$c); # tmp1 to hold F_00_19(b,c,d)
|
||||||
&xor($f,&swtmp($n0));
|
&xor($f,&swtmp(($n+2)%16));
|
||||||
&xor($tmp1,$d);
|
&xor($tmp1,$d);
|
||||||
&xor($f,&swtmp($n2));
|
&xor($f,&swtmp(($n+8)%16));
|
||||||
&and($tmp1,$b); # tmp1 holds F_00_19(b,c,d)
|
&and($tmp1,$b); # tmp1 holds F_00_19(b,c,d)
|
||||||
&rotr($b,2); # b=ROTATE(b,30)
|
&rotr($b,2); # b=ROTATE(b,30)
|
||||||
&xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd
|
&xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
|
||||||
&rotl($f,1); # f=ROATE(f,1)
|
&rotl($f,1); # f=ROTATE(f,1)
|
||||||
&xor($tmp1,$d); # tmp1=F_00_19(b,c,d)
|
&xor($tmp1,$d); # tmp1=F_00_19(b,c,d)
|
||||||
&mov(&swtmp($n0),$f); # xi=f
|
&mov(&swtmp($n%16),$f); # xi=f
|
||||||
&lea($f,&DWP($K,$f,$e,1)); # f+=K_00_19+e
|
&lea($f,&DWP(0x5a827999,$f,$e));# f+=K_00_19+e
|
||||||
&mov($e,$a); # e becomes volatile
|
&mov($e,$a); # e becomes volatile
|
||||||
&rotl($e,5); # e=ROTATE(a,5)
|
&rotl($e,5); # e=ROTATE(a,5)
|
||||||
&add($f,$tmp1); # f+=F_00_19(b,c,d)
|
&add($f,$tmp1); # f+=F_00_19(b,c,d)
|
||||||
@@ -148,48 +97,47 @@ sub BODY_16_19
|
|||||||
|
|
||||||
sub BODY_20_39
|
sub BODY_20_39
|
||||||
{
|
{
|
||||||
local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_;
|
local($n,$a,$b,$c,$d,$e,$f)=@_;
|
||||||
|
local $K=($n<40)?0x6ed9eba1:0xca62c1d6;
|
||||||
|
|
||||||
&comment("20_39 $n");
|
&comment("20_39 $n");
|
||||||
local($n0,$n1,$n2,$n3,$np)=&Na($n);
|
|
||||||
|
|
||||||
&mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d)
|
&mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d)
|
||||||
&mov($f,&swtmp($n0)); # f to hold Xupdate(xi,xa,xb,xc,xd)
|
&mov($f,&swtmp($n%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
|
||||||
&rotr($b,2); # b=ROTATE(b,30)
|
&rotr($b,2); # b=ROTATE(b,30)
|
||||||
&xor($f,&swtmp($n1));
|
&xor($f,&swtmp(($n+2)%16));
|
||||||
&xor($tmp1,$c);
|
&xor($tmp1,$c);
|
||||||
&xor($f,&swtmp($n2));
|
&xor($f,&swtmp(($n+8)%16));
|
||||||
&xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d)
|
&xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d)
|
||||||
&xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd
|
&xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
|
||||||
&rotl($f,1); # f=ROTATE(f,1)
|
&rotl($f,1); # f=ROTATE(f,1)
|
||||||
&add($tmp1,$e);
|
&add($tmp1,$e);
|
||||||
&mov(&swtmp($n0),$f); # xi=f
|
&mov(&swtmp($n%16),$f); # xi=f
|
||||||
&mov($e,$a); # e becomes volatile
|
&mov($e,$a); # e becomes volatile
|
||||||
&rotl($e,5); # e=ROTATE(a,5)
|
&rotl($e,5); # e=ROTATE(a,5)
|
||||||
&lea($f,&DWP($K,$f,$tmp1,1)); # f+=K_20_39+e
|
&lea($f,&DWP($K,$f,$tmp1)); # f+=K_20_39+e
|
||||||
&add($f,$e); # f+=ROTATE(a,5)
|
&add($f,$e); # f+=ROTATE(a,5)
|
||||||
}
|
}
|
||||||
|
|
||||||
sub BODY_40_59
|
sub BODY_40_59
|
||||||
{
|
{
|
||||||
local($pos,$K,$X,$n,$a,$b,$c,$d,$e,$f)=@_;
|
local($n,$a,$b,$c,$d,$e,$f)=@_;
|
||||||
|
|
||||||
&comment("40_59 $n");
|
&comment("40_59 $n");
|
||||||
local($n0,$n1,$n2,$n3,$np)=&Na($n);
|
|
||||||
|
|
||||||
&mov($f,&swtmp($n0)); # f to hold Xupdate(xi,xa,xb,xc,xd)
|
&mov($f,&swtmp($n%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
|
||||||
&mov($tmp1,&swtmp($n1));
|
&mov($tmp1,&swtmp(($n+2)%16));
|
||||||
&xor($f,$tmp1);
|
&xor($f,$tmp1);
|
||||||
&mov($tmp1,&swtmp($n2));
|
&mov($tmp1,&swtmp(($n+8)%16));
|
||||||
&xor($f,$tmp1);
|
&xor($f,$tmp1);
|
||||||
&mov($tmp1,&swtmp($n3));
|
&mov($tmp1,&swtmp(($n+13)%16));
|
||||||
&xor($f,$tmp1); # f holds xa^xb^xc^xd
|
&xor($f,$tmp1); # f holds xa^xb^xc^xd
|
||||||
&mov($tmp1,$b); # tmp1 to hold F_40_59(b,c,d)
|
&mov($tmp1,$b); # tmp1 to hold F_40_59(b,c,d)
|
||||||
&rotl($f,1); # f=ROTATE(f,1)
|
&rotl($f,1); # f=ROTATE(f,1)
|
||||||
&or($tmp1,$c);
|
&or($tmp1,$c);
|
||||||
&mov(&swtmp($n0),$f); # xi=f
|
&mov(&swtmp($n%16),$f); # xi=f
|
||||||
&and($tmp1,$d);
|
&and($tmp1,$d);
|
||||||
&lea($f,&DWP($K,$f,$e,1)); # f+=K_40_59+e
|
&lea($f,&DWP(0x8f1bbcdc,$f,$e));# f+=K_40_59+e
|
||||||
&mov($e,$b); # e becomes volatile and is used
|
&mov($e,$b); # e becomes volatile and is used
|
||||||
# to calculate F_40_59(b,c,d)
|
# to calculate F_40_59(b,c,d)
|
||||||
&rotr($b,2); # b=ROTATE(b,30)
|
&rotr($b,2); # b=ROTATE(b,30)
|
||||||
@@ -201,230 +149,71 @@ sub BODY_40_59
|
|||||||
&add($f,$e); # f+=ROTATE(a,5)
|
&add($f,$e); # f+=ROTATE(a,5)
|
||||||
}
|
}
|
||||||
|
|
||||||
sub BODY_60_79
|
&function_begin("sha1_block_data_order",16);
|
||||||
|
&mov($tmp1,&wparam(0)); # SHA_CTX *c
|
||||||
|
&mov($T,&wparam(1)); # const void *input
|
||||||
|
&mov($A,&wparam(2)); # size_t num
|
||||||
|
&stack_push(16); # allocate X[16]
|
||||||
|
&shl($A,6);
|
||||||
|
&add($A,$T);
|
||||||
|
&mov(&wparam(2),$A); # pointer beyond the end of input
|
||||||
|
&mov($E,&DWP(16,$tmp1));# pre-load E
|
||||||
|
|
||||||
|
&set_label("loop",16);
|
||||||
|
|
||||||
|
# copy input chunk to X, but reversing byte order!
|
||||||
|
for ($i=0; $i<16; $i+=4)
|
||||||
{
|
{
|
||||||
&BODY_20_39(@_);
|
&mov($A,&DWP(4*($i+0),$T));
|
||||||
}
|
&mov($B,&DWP(4*($i+1),$T));
|
||||||
|
&mov($C,&DWP(4*($i+2),$T));
|
||||||
sub sha1_block_host
|
&mov($D,&DWP(4*($i+3),$T));
|
||||||
{
|
&bswap($A);
|
||||||
local($name, $sclabel)=@_;
|
&bswap($B);
|
||||||
|
&bswap($C);
|
||||||
&function_begin_B($name,"");
|
&bswap($D);
|
||||||
|
|
||||||
# parameter 1 is the MD5_CTX structure.
|
|
||||||
# A 0
|
|
||||||
# B 4
|
|
||||||
# C 8
|
|
||||||
# D 12
|
|
||||||
# E 16
|
|
||||||
|
|
||||||
&mov("ecx", &wparam(2));
|
|
||||||
&push("esi");
|
|
||||||
&shl("ecx",6);
|
|
||||||
&mov("esi", &wparam(1));
|
|
||||||
&push("ebp");
|
|
||||||
&add("ecx","esi"); # offset to leave on
|
|
||||||
&push("ebx");
|
|
||||||
&mov("ebp", &wparam(0));
|
|
||||||
&push("edi");
|
|
||||||
&mov($D, &DWP(12,"ebp","",0));
|
|
||||||
&stack_push(18+9);
|
|
||||||
&mov($E, &DWP(16,"ebp","",0));
|
|
||||||
&mov($C, &DWP( 8,"ebp","",0));
|
|
||||||
&mov(&swtmp(17),"ecx");
|
|
||||||
|
|
||||||
&comment("First we need to setup the X array");
|
|
||||||
|
|
||||||
for ($i=0; $i<16; $i+=2)
|
|
||||||
{
|
|
||||||
&mov($A,&DWP(($i+0)*4,"esi","",0));# unless $i == 0;
|
|
||||||
&mov($B,&DWP(($i+1)*4,"esi","",0));
|
|
||||||
&mov(&swtmp($i+0),$A);
|
&mov(&swtmp($i+0),$A);
|
||||||
&mov(&swtmp($i+1),$B);
|
&mov(&swtmp($i+1),$B);
|
||||||
|
&mov(&swtmp($i+2),$C);
|
||||||
|
&mov(&swtmp($i+3),$D);
|
||||||
}
|
}
|
||||||
&jmp($sclabel);
|
&mov(&wparam(1),$T); # redundant in 1st spin
|
||||||
&function_end_B($name);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
&mov($A,&DWP(0,$tmp1)); # load SHA_CTX
|
||||||
|
&mov($B,&DWP(4,$tmp1));
|
||||||
|
&mov($C,&DWP(8,$tmp1));
|
||||||
|
&mov($D,&DWP(12,$tmp1));
|
||||||
|
# E is pre-loaded
|
||||||
|
|
||||||
sub sha1_block_data
|
for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
|
||||||
{
|
for(;$i<20;$i++) { &BODY_16_19($i,@V); unshift(@V,pop(@V)); }
|
||||||
local($name)=@_;
|
for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
||||||
|
for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
|
||||||
|
for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
||||||
|
|
||||||
&function_begin_B($name,"");
|
(($V[5] eq $D) and ($V[0] eq $E)) or die; # double-check
|
||||||
|
|
||||||
# parameter 1 is the MD5_CTX structure.
|
&mov($tmp1,&wparam(0)); # re-load SHA_CTX*
|
||||||
# A 0
|
&mov($D,&wparam(1)); # D is last "T" and is discarded
|
||||||
# B 4
|
|
||||||
# C 8
|
|
||||||
# D 12
|
|
||||||
# E 16
|
|
||||||
|
|
||||||
&mov("ecx", &wparam(2));
|
&add($E,&DWP(0,$tmp1)); # E is last "A"...
|
||||||
&push("esi");
|
&add($T,&DWP(4,$tmp1));
|
||||||
&shl("ecx",6);
|
&add($A,&DWP(8,$tmp1));
|
||||||
&mov("esi", &wparam(1));
|
&add($B,&DWP(12,$tmp1));
|
||||||
&push("ebp");
|
&add($C,&DWP(16,$tmp1));
|
||||||
&add("ecx","esi"); # offset to leave on
|
|
||||||
&push("ebx");
|
|
||||||
&mov("ebp", &wparam(0));
|
|
||||||
&push("edi");
|
|
||||||
&mov($D, &DWP(12,"ebp","",0));
|
|
||||||
&stack_push(18+9);
|
|
||||||
&mov($E, &DWP(16,"ebp","",0));
|
|
||||||
&mov($C, &DWP( 8,"ebp","",0));
|
|
||||||
&mov(&swtmp(17),"ecx");
|
|
||||||
|
|
||||||
&comment("First we need to setup the X array");
|
&mov(&DWP(0,$tmp1),$E); # update SHA_CTX
|
||||||
|
&add($D,64); # advance input pointer
|
||||||
|
&mov(&DWP(4,$tmp1),$T);
|
||||||
|
&cmp($D,&wparam(2)); # have we reached the end yet?
|
||||||
|
&mov(&DWP(8,$tmp1),$A);
|
||||||
|
&mov($E,$C); # C is last "E" which needs to be "pre-loaded"
|
||||||
|
&mov(&DWP(12,$tmp1),$B);
|
||||||
|
&mov($T,$D); # input pointer
|
||||||
|
&mov(&DWP(16,$tmp1),$C);
|
||||||
|
&jb(&label("loop"));
|
||||||
|
|
||||||
&set_label("start") unless $normal;
|
&stack_pop(16);
|
||||||
|
&function_end("sha1_block_data_order");
|
||||||
&X_expand("esi");
|
|
||||||
&mov(&wparam(1),"esi");
|
|
||||||
|
|
||||||
&set_label("shortcut", 0, 1);
|
|
||||||
&comment("");
|
|
||||||
&comment("Start processing");
|
|
||||||
|
|
||||||
# odd start
|
|
||||||
&mov($A, &DWP( 0,"ebp","",0));
|
|
||||||
&mov($B, &DWP( 4,"ebp","",0));
|
|
||||||
$X="esp";
|
|
||||||
&BODY_00_15(-2,$K[0],$X, 0,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 1,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 2,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 3,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 4,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 5,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 6,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 7,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 8,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_00_15( 0,$K[0],$X, 9,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_00_15( 0,$K[0],$X,10,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_00_15( 0,$K[0],$X,11,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_00_15( 0,$K[0],$X,12,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_00_15( 0,$K[0],$X,13,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_00_15( 0,$K[0],$X,14,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_00_15( 1,$K[0],$X,15,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_16_19(-1,$K[0],$X,16,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_16_19( 0,$K[0],$X,17,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_16_19( 0,$K[0],$X,18,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_16_19( 1,$K[0],$X,19,$T,$A,$B,$C,$D,$E);
|
|
||||||
|
|
||||||
&BODY_20_39(-1,$K[1],$X,20,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,21,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,22,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,23,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,24,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,25,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,26,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,27,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,28,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,29,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,30,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,31,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,32,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,33,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,34,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,35,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,36,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,37,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39( 0,$K[1],$X,38,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39( 1,$K[1],$X,39,$D,$E,$T,$A,$B,$C);
|
|
||||||
|
|
||||||
&BODY_40_59(-1,$K[2],$X,40,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,41,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,42,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,43,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,44,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,45,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,46,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,47,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,48,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,49,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,50,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,51,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,52,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,53,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,54,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,55,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,56,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,57,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59( 0,$K[2],$X,58,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59( 1,$K[2],$X,59,$B,$C,$D,$E,$T,$A);
|
|
||||||
|
|
||||||
&BODY_60_79(-1,$K[3],$X,60,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,61,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,62,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,63,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,64,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,65,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,66,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,67,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,68,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,69,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,70,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,71,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,72,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,73,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,74,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,75,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,76,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,77,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79( 0,$K[3],$X,78,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79( 2,$K[3],$X,79,$T,$A,$B,$C,$D,$E);
|
|
||||||
|
|
||||||
&comment("End processing");
|
|
||||||
&comment("");
|
|
||||||
# D is the tmp value
|
|
||||||
|
|
||||||
# E -> A
|
|
||||||
# T -> B
|
|
||||||
# A -> C
|
|
||||||
# B -> D
|
|
||||||
# C -> E
|
|
||||||
# D -> T
|
|
||||||
|
|
||||||
&mov($tmp1,&wparam(0));
|
|
||||||
|
|
||||||
&mov($D, &DWP(12,$tmp1,"",0));
|
|
||||||
&add($D,$B);
|
|
||||||
&mov($B, &DWP( 4,$tmp1,"",0));
|
|
||||||
&add($B,$T);
|
|
||||||
&mov($T, $A);
|
|
||||||
&mov($A, &DWP( 0,$tmp1,"",0));
|
|
||||||
&mov(&DWP(12,$tmp1,"",0),$D);
|
|
||||||
|
|
||||||
&add($A,$E);
|
|
||||||
&mov($E, &DWP(16,$tmp1,"",0));
|
|
||||||
&add($E,$C);
|
|
||||||
&mov($C, &DWP( 8,$tmp1,"",0));
|
|
||||||
&add($C,$T);
|
|
||||||
|
|
||||||
&mov(&DWP( 0,$tmp1,"",0),$A);
|
|
||||||
&mov("esi",&wparam(1));
|
|
||||||
&mov(&DWP( 8,$tmp1,"",0),$C);
|
|
||||||
&add("esi",64);
|
|
||||||
&mov("eax",&swtmp(17));
|
|
||||||
&mov(&DWP(16,$tmp1,"",0),$E);
|
|
||||||
&cmp("esi","eax");
|
|
||||||
&mov(&DWP( 4,$tmp1,"",0),$B);
|
|
||||||
&jb(&label("start"));
|
|
||||||
|
|
||||||
&stack_pop(18+9);
|
|
||||||
&pop("edi");
|
|
||||||
&pop("ebx");
|
|
||||||
&pop("ebp");
|
|
||||||
&pop("esi");
|
|
||||||
&ret();
|
|
||||||
|
|
||||||
# keep a note of shortcut label so it can be used outside
|
|
||||||
# block.
|
|
||||||
my $sclabel = &label("shortcut");
|
|
||||||
|
|
||||||
&function_end_B($name);
|
|
||||||
# Putting this here avoids problems with MASM in debugging mode
|
|
||||||
&sha1_block_host("sha1_block_asm_host_order", $sclabel);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
&asm_finish();
|
||||||
|
@@ -2,8 +2,9 @@
|
|||||||
#
|
#
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||||
# project. Rights for redistribution and usage in source and binary
|
# project. The module is, however, dual licensed under OpenSSL and
|
||||||
# forms are granted according to the OpenSSL license.
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||||
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
#
|
#
|
||||||
# Eternal question is what's wrong with compiler generated code? The
|
# Eternal question is what's wrong with compiler generated code? The
|
||||||
@@ -11,15 +12,10 @@
|
|||||||
# to perform rotations by maintaining copy of 32-bit value in upper
|
# to perform rotations by maintaining copy of 32-bit value in upper
|
||||||
# bits of 64-bit register. Just follow mux2 and shrp instructions...
|
# bits of 64-bit register. Just follow mux2 and shrp instructions...
|
||||||
# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which
|
# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which
|
||||||
# is >50% better than HP C and >2x better than gcc. As of this moment
|
# is >50% better than HP C and >2x better than gcc.
|
||||||
# performance under little-endian OS such as Linux and Windows will be
|
|
||||||
# a bit lower, because data has to be picked in reverse byte-order.
|
|
||||||
# It's possible to resolve this issue by implementing third function,
|
|
||||||
# sha1_block_asm_data_order_aligned, which would temporarily flip
|
|
||||||
# BE field in User Mask register...
|
|
||||||
|
|
||||||
$code=<<___;
|
$code=<<___;
|
||||||
.ident \"sha1-ia64.s, version 1.0\"
|
.ident \"sha1-ia64.s, version 1.2\"
|
||||||
.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
|
.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
|
||||||
.explicit
|
.explicit
|
||||||
|
|
||||||
@@ -55,63 +51,55 @@ else {
|
|||||||
|
|
||||||
sub BODY_00_15 {
|
sub BODY_00_15 {
|
||||||
local *code=shift;
|
local *code=shift;
|
||||||
local ($i,$a,$b,$c,$d,$e,$f,$unaligned)=@_;
|
local ($i,$a,$b,$c,$d,$e,$f)=@_;
|
||||||
|
|
||||||
if ($unaligned) {
|
$code.=<<___ if ($i==0);
|
||||||
$code.=<<___;
|
{ .mmi; ld1 $X[$i&0xf]=[inp],2 // MSB
|
||||||
{ .mmi; ld1 tmp0=[inp],2 // MSB
|
ld1 tmp2=[tmp3],2 };;
|
||||||
ld1 tmp1=[tmp3],2 };;
|
{ .mmi; ld1 tmp0=[inp],2
|
||||||
{ .mmi; ld1 tmp2=[inp],2
|
ld1 tmp4=[tmp3],2 // LSB
|
||||||
ld1 $X[$i&0xf]=[tmp3],2 // LSB
|
dep $X[$i&0xf]=$X[$i&0xf],tmp2,8,8 };;
|
||||||
dep tmp1=tmp0,tmp1,8,8 };;
|
|
||||||
{ .mii; cmp.ne p16,p0=r0,r0 // no misaligned prefetch
|
|
||||||
dep $X[$i&0xf]=tmp2,$X[$i&0xf],8,8;;
|
|
||||||
dep $X[$i&0xf]=tmp1,$X[$i&0xf],16,16 };;
|
|
||||||
{ .mmi; nop.m 0
|
|
||||||
___
|
___
|
||||||
}
|
|
||||||
elsif ($i<15) {
|
|
||||||
$code.=<<___;
|
|
||||||
{ .mmi; ld4 $X[($i+1)&0xf]=[inp],4 // prefetch
|
|
||||||
___
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$code.=<<___;
|
|
||||||
{ .mmi; nop.m 0
|
|
||||||
___
|
|
||||||
}
|
|
||||||
if ($i<15) {
|
if ($i<15) {
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
and tmp0=$c,$b
|
{ .mmi; ld1 $X[($i+1)&0xf]=[inp],2 // +1
|
||||||
dep.z tmp5=$a,5,27 } // a<<5
|
dep tmp1=tmp0,tmp4,8,8 };;
|
||||||
|
{ .mmi; ld1 tmp2=[tmp3],2 // +1
|
||||||
|
and tmp4=$c,$b
|
||||||
|
dep $X[$i&0xf]=$X[$i&0xf],tmp1,16,16 } //;;
|
||||||
{ .mmi; andcm tmp1=$d,$b
|
{ .mmi; andcm tmp1=$d,$b
|
||||||
add tmp4=$e,$K_00_19 };;
|
add tmp0=$e,$K_00_19
|
||||||
{ .mmi; or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d)
|
dep.z tmp5=$a,5,27 };; // a<<5
|
||||||
add $f=tmp4,$X[$i&0xf] // f=xi+e+K_00_19
|
{ .mmi; or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d)
|
||||||
|
add $f=tmp0,$X[$i&0xf] // f=xi+e+K_00_19
|
||||||
extr.u tmp1=$a,27,5 };; // a>>27
|
extr.u tmp1=$a,27,5 };; // a>>27
|
||||||
{ .mib; add $f=$f,tmp0 // f+=F_00_19(b,c,d)
|
{ .mmi; ld1 tmp0=[inp],2 // +1
|
||||||
|
add $f=$f,tmp4 // f+=F_00_19(b,c,d)
|
||||||
shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
|
shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
|
||||||
{ .mib; or tmp1=tmp1,tmp5 // ROTATE(a,5)
|
{ .mmi; ld1 tmp4=[tmp3],2 // +1
|
||||||
|
or tmp5=tmp1,tmp5 // ROTATE(a,5)
|
||||||
mux2 tmp6=$a,0x44 };; // see b in next iteration
|
mux2 tmp6=$a,0x44 };; // see b in next iteration
|
||||||
{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5)
|
{ .mii; add $f=$f,tmp5 // f+=ROTATE(a,5)
|
||||||
mux2 $X[$i&0xf]=$X[$i&0xf],0x44
|
dep $X[($i+1)&0xf]=$X[($i+1)&0xf],tmp2,8,8 // +1
|
||||||
nop.i 0 };;
|
mux2 $X[$i&0xf]=$X[$i&0xf],0x44 } //;;
|
||||||
|
|
||||||
___
|
___
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
and tmp0=$c,$b
|
{ .mii; and tmp3=$c,$b
|
||||||
dep.z tmp5=$a,5,27 } // a<<5 ;;?
|
dep tmp1=tmp0,tmp4,8,8;;
|
||||||
|
dep $X[$i&0xf]=$X[$i&0xf],tmp1,16,16 } //;;
|
||||||
{ .mmi; andcm tmp1=$d,$b
|
{ .mmi; andcm tmp1=$d,$b
|
||||||
add tmp4=$e,$K_00_19 };;
|
add tmp0=$e,$K_00_19
|
||||||
{ .mmi; or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d)
|
dep.z tmp5=$a,5,27 };; // a<<5
|
||||||
add $f=tmp4,$X[$i&0xf] // f=xi+e+K_00_19
|
{ .mmi; or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d)
|
||||||
|
add $f=tmp0,$X[$i&0xf] // f=xi+e+K_00_19
|
||||||
extr.u tmp1=$a,27,5 } // a>>27
|
extr.u tmp1=$a,27,5 } // a>>27
|
||||||
{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1
|
{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1
|
||||||
xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1
|
xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1
|
||||||
nop.i 0 };;
|
nop.i 0 };;
|
||||||
{ .mmi; add $f=$f,tmp0 // f+=F_00_19(b,c,d)
|
{ .mmi; add $f=$f,tmp4 // f+=F_00_19(b,c,d)
|
||||||
xor tmp2=tmp2,tmp3 // +1
|
xor tmp2=tmp2,tmp3 // +1
|
||||||
shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
|
shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
|
||||||
{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
|
{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
|
||||||
@@ -190,9 +178,7 @@ $code.=<<___;
|
|||||||
extr.u tmp1=$a,27,5 } // a>>27
|
extr.u tmp1=$a,27,5 } // a>>27
|
||||||
{ .mib; add $f=$f,tmp4 // f+=e+K_20_39
|
{ .mib; add $f=$f,tmp4 // f+=e+K_20_39
|
||||||
add $h1=$h1,$a };; // wrap up
|
add $h1=$h1,$a };; // wrap up
|
||||||
{ .mmi;
|
{ .mmi; add $f=$f,tmp0 // f+=F_20_39(b,c,d)
|
||||||
(p16) ld4.s $X[0]=[inp],4 // non-faulting prefetch
|
|
||||||
add $f=$f,tmp0 // f+=F_20_39(b,c,d)
|
|
||||||
shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) ;;?
|
shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) ;;?
|
||||||
{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
|
{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
|
||||||
add $h3=$h3,$c };; // wrap up
|
add $h3=$h3,$c };; // wrap up
|
||||||
@@ -245,164 +231,11 @@ tmp3=r11;
|
|||||||
ctx=r32; // in0
|
ctx=r32; // in0
|
||||||
inp=r33; // in1
|
inp=r33; // in1
|
||||||
|
|
||||||
// void sha1_block_asm_host_order(SHA_CTX *c,const void *p,size_t num);
|
// void sha1_block_data_order(SHA_CTX *c,const void *p,size_t num);
|
||||||
.global sha1_block_asm_host_order#
|
.global sha1_block_data_order#
|
||||||
.proc sha1_block_asm_host_order#
|
.proc sha1_block_data_order#
|
||||||
.align 32
|
.align 32
|
||||||
sha1_block_asm_host_order:
|
sha1_block_data_order:
|
||||||
.prologue
|
|
||||||
{ .mmi; alloc tmp1=ar.pfs,3,15,0,0
|
|
||||||
$ADDP tmp0=4,ctx
|
|
||||||
.save ar.lc,r3
|
|
||||||
mov r3=ar.lc }
|
|
||||||
{ .mmi; $ADDP ctx=0,ctx
|
|
||||||
$ADDP inp=0,inp
|
|
||||||
mov r2=pr };;
|
|
||||||
tmp4=in2;
|
|
||||||
tmp5=loc13;
|
|
||||||
tmp6=loc14;
|
|
||||||
.body
|
|
||||||
{ .mlx; ld4 $h0=[ctx],8
|
|
||||||
movl $K_00_19=0x5a827999 }
|
|
||||||
{ .mlx; ld4 $h1=[tmp0],8
|
|
||||||
movl $K_20_39=0x6ed9eba1 };;
|
|
||||||
{ .mlx; ld4 $h2=[ctx],8
|
|
||||||
movl $K_40_59=0x8f1bbcdc }
|
|
||||||
{ .mlx; ld4 $h3=[tmp0]
|
|
||||||
movl $K_60_79=0xca62c1d6 };;
|
|
||||||
{ .mmi; ld4 $h4=[ctx],-16
|
|
||||||
add in2=-1,in2 // adjust num for ar.lc
|
|
||||||
mov ar.ec=1 };;
|
|
||||||
{ .mmi; ld4 $X[0]=[inp],4 // prefetch
|
|
||||||
cmp.ne p16,p0=r0,in2 // prefecth at loop end
|
|
||||||
mov ar.lc=in2 };; // brp.loop.imp: too far
|
|
||||||
|
|
||||||
.Lhtop:
|
|
||||||
{ .mmi; mov $A=$h0
|
|
||||||
mov $B=$h1
|
|
||||||
mux2 tmp6=$h1,0x44 }
|
|
||||||
{ .mmi; mov $C=$h2
|
|
||||||
mov $D=$h3
|
|
||||||
mov $E=$h4 };;
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
&BODY_00_15(\$code, 0,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_00_15(\$code, 1,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_00_15(\$code, 2,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_00_15(\$code, 3,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_00_15(\$code, 4,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_00_15(\$code, 5,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_00_15(\$code, 6,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_00_15(\$code, 7,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_00_15(\$code, 8,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_00_15(\$code, 9,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_00_15(\$code,10,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_00_15(\$code,11,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_00_15(\$code,12,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_00_15(\$code,13,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_00_15(\$code,14,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_00_15(\$code,15,$D,$E,$T,$A,$B,$C);
|
|
||||||
|
|
||||||
&BODY_16_19(\$code,16,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_16_19(\$code,17,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_16_19(\$code,18,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_16_19(\$code,19,$T,$A,$B,$C,$D,$E);
|
|
||||||
|
|
||||||
&BODY_20_39(\$code,20,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39(\$code,21,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39(\$code,22,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39(\$code,23,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39(\$code,24,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39(\$code,25,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39(\$code,26,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39(\$code,27,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39(\$code,28,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39(\$code,29,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39(\$code,30,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39(\$code,31,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39(\$code,32,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39(\$code,33,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39(\$code,34,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39(\$code,35,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39(\$code,36,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39(\$code,37,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39(\$code,38,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39(\$code,39,$D,$E,$T,$A,$B,$C);
|
|
||||||
|
|
||||||
&BODY_40_59(\$code,40,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,41,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59(\$code,42,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59(\$code,43,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59(\$code,44,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59(\$code,45,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59(\$code,46,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,47,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59(\$code,48,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59(\$code,49,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59(\$code,50,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59(\$code,51,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59(\$code,52,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,53,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59(\$code,54,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59(\$code,55,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59(\$code,56,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59(\$code,57,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59(\$code,58,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,59,$B,$C,$D,$E,$T,$A);
|
|
||||||
|
|
||||||
&BODY_60_79(\$code,60,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,61,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79(\$code,62,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79(\$code,63,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79(\$code,64,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79(\$code,65,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79(\$code,66,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,67,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79(\$code,68,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79(\$code,69,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79(\$code,70,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79(\$code,71,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79(\$code,72,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,73,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79(\$code,74,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79(\$code,75,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79(\$code,76,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79(\$code,77,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79(\$code,78,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,79,$T,$A,$B,$C,$D,$E);
|
|
||||||
|
|
||||||
$code.=<<___;
|
|
||||||
{ .mmb; add $h0=$h0,$E
|
|
||||||
nop.m 0
|
|
||||||
br.ctop.dptk.many .Lhtop };;
|
|
||||||
.Lhend:
|
|
||||||
{ .mmi; add tmp0=4,ctx
|
|
||||||
mov ar.lc=r3 };;
|
|
||||||
{ .mmi; st4 [ctx]=$h0,8
|
|
||||||
st4 [tmp0]=$h1,8 };;
|
|
||||||
{ .mmi; st4 [ctx]=$h2,8
|
|
||||||
st4 [tmp0]=$h3 };;
|
|
||||||
{ .mib; st4 [ctx]=$h4,-16
|
|
||||||
mov pr=r2,0x1ffff
|
|
||||||
br.ret.sptk.many b0 };;
|
|
||||||
.endp sha1_block_asm_host_order#
|
|
||||||
___
|
|
||||||
|
|
||||||
|
|
||||||
$code.=<<___;
|
|
||||||
// void sha1_block_asm_data_order(SHA_CTX *c,const void *p,size_t num);
|
|
||||||
.global sha1_block_asm_data_order#
|
|
||||||
.proc sha1_block_asm_data_order#
|
|
||||||
.align 32
|
|
||||||
sha1_block_asm_data_order:
|
|
||||||
___
|
|
||||||
$code.=<<___ if ($big_endian);
|
|
||||||
{ .mmi; and r2=3,inp };;
|
|
||||||
{ .mib; cmp.eq p6,p0=r0,r2
|
|
||||||
(p6) br.dptk.many sha1_block_asm_host_order };;
|
|
||||||
___
|
|
||||||
$code.=<<___;
|
|
||||||
.prologue
|
.prologue
|
||||||
{ .mmi; alloc tmp1=ar.pfs,3,15,0,0
|
{ .mmi; alloc tmp1=ar.pfs,3,15,0,0
|
||||||
$ADDP tmp0=4,ctx
|
$ADDP tmp0=4,ctx
|
||||||
@@ -440,90 +273,16 @@ tmp6=loc14;
|
|||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
&BODY_00_15(\$code, 0,$A,$B,$C,$D,$E,$T,1);
|
{ my $i,@V=($A,$B,$C,$D,$E,$T);
|
||||||
&BODY_00_15(\$code, 1,$T,$A,$B,$C,$D,$E,1);
|
|
||||||
&BODY_00_15(\$code, 2,$E,$T,$A,$B,$C,$D,1);
|
|
||||||
&BODY_00_15(\$code, 3,$D,$E,$T,$A,$B,$C,1);
|
|
||||||
&BODY_00_15(\$code, 4,$C,$D,$E,$T,$A,$B,1);
|
|
||||||
&BODY_00_15(\$code, 5,$B,$C,$D,$E,$T,$A,1);
|
|
||||||
&BODY_00_15(\$code, 6,$A,$B,$C,$D,$E,$T,1);
|
|
||||||
&BODY_00_15(\$code, 7,$T,$A,$B,$C,$D,$E,1);
|
|
||||||
&BODY_00_15(\$code, 8,$E,$T,$A,$B,$C,$D,1);
|
|
||||||
&BODY_00_15(\$code, 9,$D,$E,$T,$A,$B,$C,1);
|
|
||||||
&BODY_00_15(\$code,10,$C,$D,$E,$T,$A,$B,1);
|
|
||||||
&BODY_00_15(\$code,11,$B,$C,$D,$E,$T,$A,1);
|
|
||||||
&BODY_00_15(\$code,12,$A,$B,$C,$D,$E,$T,1);
|
|
||||||
&BODY_00_15(\$code,13,$T,$A,$B,$C,$D,$E,1);
|
|
||||||
&BODY_00_15(\$code,14,$E,$T,$A,$B,$C,$D,1);
|
|
||||||
&BODY_00_15(\$code,15,$D,$E,$T,$A,$B,$C,1);
|
|
||||||
|
|
||||||
&BODY_16_19(\$code,16,$C,$D,$E,$T,$A,$B);
|
for($i=0;$i<16;$i++) { &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); }
|
||||||
&BODY_16_19(\$code,17,$B,$C,$D,$E,$T,$A);
|
for(;$i<20;$i++) { &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); }
|
||||||
&BODY_16_19(\$code,18,$A,$B,$C,$D,$E,$T);
|
for(;$i<40;$i++) { &BODY_20_39(\$code,$i,@V); unshift(@V,pop(@V)); }
|
||||||
&BODY_16_19(\$code,19,$T,$A,$B,$C,$D,$E);
|
for(;$i<60;$i++) { &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); }
|
||||||
|
for(;$i<80;$i++) { &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); }
|
||||||
|
|
||||||
&BODY_20_39(\$code,20,$E,$T,$A,$B,$C,$D);
|
(($V[5] eq $D) and ($V[0] eq $E)) or die; # double-check
|
||||||
&BODY_20_39(\$code,21,$D,$E,$T,$A,$B,$C);
|
}
|
||||||
&BODY_20_39(\$code,22,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39(\$code,23,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39(\$code,24,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39(\$code,25,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39(\$code,26,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39(\$code,27,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39(\$code,28,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39(\$code,29,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39(\$code,30,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39(\$code,31,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39(\$code,32,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39(\$code,33,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_20_39(\$code,34,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_20_39(\$code,35,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_20_39(\$code,36,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_20_39(\$code,37,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_20_39(\$code,38,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_20_39(\$code,39,$D,$E,$T,$A,$B,$C);
|
|
||||||
|
|
||||||
&BODY_40_59(\$code,40,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,41,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59(\$code,42,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59(\$code,43,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59(\$code,44,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59(\$code,45,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59(\$code,46,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,47,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59(\$code,48,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59(\$code,49,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59(\$code,50,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59(\$code,51,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59(\$code,52,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,53,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_40_59(\$code,54,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_40_59(\$code,55,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_40_59(\$code,56,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_40_59(\$code,57,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_40_59(\$code,58,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_40_59(\$code,59,$B,$C,$D,$E,$T,$A);
|
|
||||||
|
|
||||||
&BODY_60_79(\$code,60,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,61,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79(\$code,62,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79(\$code,63,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79(\$code,64,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79(\$code,65,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79(\$code,66,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,67,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79(\$code,68,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79(\$code,69,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79(\$code,70,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79(\$code,71,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79(\$code,72,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,73,$T,$A,$B,$C,$D,$E);
|
|
||||||
&BODY_60_79(\$code,74,$E,$T,$A,$B,$C,$D);
|
|
||||||
&BODY_60_79(\$code,75,$D,$E,$T,$A,$B,$C);
|
|
||||||
&BODY_60_79(\$code,76,$C,$D,$E,$T,$A,$B);
|
|
||||||
&BODY_60_79(\$code,77,$B,$C,$D,$E,$T,$A);
|
|
||||||
&BODY_60_79(\$code,78,$A,$B,$C,$D,$E,$T);
|
|
||||||
&BODY_60_79(\$code,79,$T,$A,$B,$C,$D,$E);
|
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
{ .mmb; add $h0=$h0,$E
|
{ .mmb; add $h0=$h0,$E
|
||||||
@@ -539,7 +298,8 @@ $code.=<<___;
|
|||||||
{ .mib; st4 [ctx]=$h4,-16
|
{ .mib; st4 [ctx]=$h4,-16
|
||||||
mov pr=r2,0x1ffff
|
mov pr=r2,0x1ffff
|
||||||
br.ret.sptk.many b0 };;
|
br.ret.sptk.many b0 };;
|
||||||
.endp sha1_block_asm_data_order#
|
.endp sha1_block_data_order#
|
||||||
|
stringz "SHA1 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
|
||||||
___
|
___
|
||||||
|
|
||||||
print $code;
|
print $code;
|
||||||
|
@@ -2,8 +2,9 @@
|
|||||||
#
|
#
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||||
# project. Rights for redistribution and usage in source and binary
|
# project. The module is, however, dual licensed under OpenSSL and
|
||||||
# forms are granted according to the OpenSSL license.
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||||
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
#
|
#
|
||||||
# SHA256/512_Transform for Itanium.
|
# SHA256/512_Transform for Itanium.
|
||||||
@@ -71,7 +72,7 @@ if ($output =~ /512.*\.[s|asm]/) {
|
|||||||
$ADD="add";
|
$ADD="add";
|
||||||
$SHRU="shr.u";
|
$SHRU="shr.u";
|
||||||
$TABLE="K512";
|
$TABLE="K512";
|
||||||
$func="sha512_block";
|
$func="sha512_block_data_order";
|
||||||
@Sigma0=(28,34,39);
|
@Sigma0=(28,34,39);
|
||||||
@Sigma1=(14,18,41);
|
@Sigma1=(14,18,41);
|
||||||
@sigma0=(1, 8, 7);
|
@sigma0=(1, 8, 7);
|
||||||
@@ -85,7 +86,7 @@ if ($output =~ /512.*\.[s|asm]/) {
|
|||||||
$ADD="padd4";
|
$ADD="padd4";
|
||||||
$SHRU="pshr4.u";
|
$SHRU="pshr4.u";
|
||||||
$TABLE="K256";
|
$TABLE="K256";
|
||||||
$func="sha256_block";
|
$func="sha256_block_data_order";
|
||||||
@Sigma0=( 2,13,22);
|
@Sigma0=( 2,13,22);
|
||||||
@Sigma1=( 6,11,25);
|
@Sigma1=( 6,11,25);
|
||||||
@sigma0=( 7,18, 3);
|
@sigma0=( 7,18, 3);
|
||||||
@@ -105,11 +106,13 @@ if (!defined($big_endian))
|
|||||||
{ $big_endian=(unpack('L',pack('N',1))==1); }
|
{ $big_endian=(unpack('L',pack('N',1))==1); }
|
||||||
|
|
||||||
$code=<<___;
|
$code=<<___;
|
||||||
.ident \"$output, version 1.0\"
|
.ident \"$output, version 1.1\"
|
||||||
.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
|
.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
|
||||||
.explicit
|
.explicit
|
||||||
.text
|
.text
|
||||||
|
|
||||||
|
pfssave=r2;
|
||||||
|
lcsave=r3;
|
||||||
prsave=r14;
|
prsave=r14;
|
||||||
K=r15;
|
K=r15;
|
||||||
A=r16; B=r17; C=r18; D=r19;
|
A=r16; B=r17; C=r18; D=r19;
|
||||||
@@ -121,6 +124,8 @@ ctx=r31; // 1st arg
|
|||||||
input=r48; // 2nd arg
|
input=r48; // 2nd arg
|
||||||
num=r49; // 3rd arg
|
num=r49; // 3rd arg
|
||||||
sgm0=r50; sgm1=r51; // small constants
|
sgm0=r50; sgm1=r51; // small constants
|
||||||
|
A_=r54; B_=r55; C_=r56; D_=r57;
|
||||||
|
E_=r58; F_=r59; G_=r60; H_=r61;
|
||||||
|
|
||||||
// void $func (SHA_CTX *ctx, const void *in,size_t num[,int host])
|
// void $func (SHA_CTX *ctx, const void *in,size_t num[,int host])
|
||||||
.global $func#
|
.global $func#
|
||||||
@@ -128,81 +133,319 @@ sgm0=r50; sgm1=r51; // small constants
|
|||||||
.align 32
|
.align 32
|
||||||
$func:
|
$func:
|
||||||
.prologue
|
.prologue
|
||||||
.save ar.pfs,r2
|
.save ar.pfs,pfssave
|
||||||
{ .mmi; alloc r2=ar.pfs,3,17,0,16
|
{ .mmi; alloc pfssave=ar.pfs,3,27,0,16
|
||||||
$ADDP ctx=0,r32 // 1st arg
|
$ADDP ctx=0,r32 // 1st arg
|
||||||
.save ar.lc,r3
|
.save ar.lc,lcsave
|
||||||
mov r3=ar.lc }
|
mov lcsave=ar.lc }
|
||||||
{ .mmi; $ADDP input=0,r33 // 2nd arg
|
{ .mmi; $ADDP input=0,r33 // 2nd arg
|
||||||
addl Ktbl=\@ltoff($TABLE#),gp
|
mov num=r34 // 3rd arg
|
||||||
.save pr,prsave
|
.save pr,prsave
|
||||||
mov prsave=pr };;
|
mov prsave=pr };;
|
||||||
|
|
||||||
.body
|
.body
|
||||||
{ .mii; ld8 Ktbl=[Ktbl]
|
|
||||||
mov num=r34 };; // 3rd arg
|
|
||||||
|
|
||||||
{ .mib; add r8=0*$SZ,ctx
|
{ .mib; add r8=0*$SZ,ctx
|
||||||
add r9=1*$SZ,ctx
|
add r9=1*$SZ,ctx
|
||||||
brp.loop.imp .L_first16,.L_first16_ctop
|
brp.loop.imp .L_first16,.L_first16_end-16 }
|
||||||
}
|
|
||||||
{ .mib; add r10=2*$SZ,ctx
|
{ .mib; add r10=2*$SZ,ctx
|
||||||
add r11=3*$SZ,ctx
|
add r11=3*$SZ,ctx
|
||||||
brp.loop.imp .L_rest,.L_rest_ctop
|
brp.loop.imp .L_rest,.L_rest_end-16 };;
|
||||||
};;
|
|
||||||
// load A-H
|
|
||||||
{ .mmi; $LDW A=[r8],4*$SZ
|
|
||||||
$LDW B=[r9],4*$SZ
|
|
||||||
mov sgm0=$sigma0[2] }
|
|
||||||
{ .mmi; $LDW C=[r10],4*$SZ
|
|
||||||
$LDW D=[r11],4*$SZ
|
|
||||||
mov sgm1=$sigma1[2] };;
|
|
||||||
{ .mmi; $LDW E=[r8]
|
|
||||||
$LDW F=[r9] }
|
|
||||||
{ .mmi; $LDW G=[r10]
|
|
||||||
$LDW H=[r11]
|
|
||||||
cmp.ne p15,p14=0,r35 };; // used in sha256_block
|
|
||||||
|
|
||||||
|
// load A-H
|
||||||
|
.Lpic_point:
|
||||||
|
{ .mmi; $LDW A_=[r8],4*$SZ
|
||||||
|
$LDW B_=[r9],4*$SZ
|
||||||
|
mov Ktbl=ip }
|
||||||
|
{ .mmi; $LDW C_=[r10],4*$SZ
|
||||||
|
$LDW D_=[r11],4*$SZ
|
||||||
|
mov sgm0=$sigma0[2] };;
|
||||||
|
{ .mmi; $LDW E_=[r8]
|
||||||
|
$LDW F_=[r9]
|
||||||
|
add Ktbl=($TABLE#-.Lpic_point),Ktbl }
|
||||||
|
{ .mmi; $LDW G_=[r10]
|
||||||
|
$LDW H_=[r11]
|
||||||
|
cmp.ne p0,p16=0,r0 };; // used in sha256_block
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($BITS==64);
|
||||||
|
{ .mii; and r8=7,input
|
||||||
|
and input=~7,input;;
|
||||||
|
cmp.eq p9,p0=1,r8 }
|
||||||
|
{ .mmi; cmp.eq p10,p0=2,r8
|
||||||
|
cmp.eq p11,p0=3,r8
|
||||||
|
cmp.eq p12,p0=4,r8 }
|
||||||
|
{ .mmi; cmp.eq p13,p0=5,r8
|
||||||
|
cmp.eq p14,p0=6,r8
|
||||||
|
cmp.eq p15,p0=7,r8 };;
|
||||||
|
___
|
||||||
|
$code.=<<___;
|
||||||
.L_outer:
|
.L_outer:
|
||||||
{ .mii; mov ar.lc=15
|
|
||||||
mov ar.ec=1 };;
|
|
||||||
.align 32
|
|
||||||
.L_first16:
|
|
||||||
.rotr X[16]
|
.rotr X[16]
|
||||||
|
{ .mmi; mov A=A_
|
||||||
|
mov B=B_
|
||||||
|
mov ar.lc=14 }
|
||||||
|
{ .mmi; mov C=C_
|
||||||
|
mov D=D_
|
||||||
|
mov E=E_ }
|
||||||
|
{ .mmi; mov F=F_
|
||||||
|
mov G=G_
|
||||||
|
mov ar.ec=2 }
|
||||||
|
{ .mmi; ld1 X[15]=[input],$SZ // eliminated in 64-bit
|
||||||
|
mov H=H_
|
||||||
|
mov sgm1=$sigma1[2] };;
|
||||||
|
|
||||||
___
|
___
|
||||||
$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32);
|
$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32);
|
||||||
{ .mib; (p14) add r9=1,input
|
.align 32
|
||||||
(p14) add r10=2,input }
|
.L_first16:
|
||||||
{ .mib; (p14) add r11=3,input
|
{ .mmi; add r9=1-$SZ,input
|
||||||
(p15) br.dptk.few .L_host };;
|
add r10=2-$SZ,input
|
||||||
{ .mmi; (p14) ld1 r8=[input],$SZ
|
add r11=3-$SZ,input };;
|
||||||
(p14) ld1 r9=[r9] }
|
{ .mmi; ld1 r9=[r9]
|
||||||
{ .mmi; (p14) ld1 r10=[r10]
|
ld1 r10=[r10]
|
||||||
(p14) ld1 r11=[r11] };;
|
|
||||||
{ .mii; (p14) dep r9=r8,r9,8,8
|
|
||||||
(p14) dep r11=r10,r11,8,8 };;
|
|
||||||
{ .mib; (p14) dep X[15]=r9,r11,16,16 };;
|
|
||||||
.L_host:
|
|
||||||
{ .mib; (p15) $LDW X[15]=[input],$SZ // X[i]=*input++
|
|
||||||
dep.z $t1=E,32,32 }
|
dep.z $t1=E,32,32 }
|
||||||
{ .mib; $LDW K=[Ktbl],$SZ
|
{ .mmi; $LDW K=[Ktbl],$SZ
|
||||||
|
ld1 r11=[r11]
|
||||||
zxt4 E=E };;
|
zxt4 E=E };;
|
||||||
{ .mmi; or $t1=$t1,E
|
{ .mii; or $t1=$t1,E
|
||||||
and T1=F,E
|
dep X[15]=X[15],r9,8,8
|
||||||
and T2=A,B }
|
dep r11=r10,r11,8,8 };;
|
||||||
|
{ .mmi; and T1=F,E
|
||||||
|
and T2=A,B
|
||||||
|
dep X[15]=X[15],r11,16,16 }
|
||||||
{ .mmi; andcm r8=G,E
|
{ .mmi; andcm r8=G,E
|
||||||
and r9=A,C
|
and r9=A,C
|
||||||
mux2 $t0=A,0x44 };; // copy lower half to upper
|
mux2 $t0=A,0x44 };; // copy lower half to upper
|
||||||
{ .mib; xor T1=T1,r8 // T1=((e & f) ^ (~e & g))
|
{ .mmi; (p16) ld1 X[15-1]=[input],$SZ // prefetch
|
||||||
|
xor T1=T1,r8 // T1=((e & f) ^ (~e & g))
|
||||||
_rotr r11=$t1,$Sigma1[0] } // ROTR(e,14)
|
_rotr r11=$t1,$Sigma1[0] } // ROTR(e,14)
|
||||||
{ .mib; and r10=B,C
|
{ .mib; and r10=B,C
|
||||||
xor T2=T2,r9 };;
|
xor T2=T2,r9 };;
|
||||||
___
|
___
|
||||||
$t0="A", $t1="E", $code.=<<___ if ($BITS==64);
|
$t0="A", $t1="E", $code.=<<___ if ($BITS==64);
|
||||||
{ .mmi; $LDW X[15]=[input],$SZ // X[i]=*input++
|
// in 64-bit mode I load whole X[16] at once and take care of alignment...
|
||||||
|
{ .mmi; add r8=1*$SZ,input
|
||||||
|
add r9=2*$SZ,input
|
||||||
|
add r10=3*$SZ,input };;
|
||||||
|
{ .mmb; $LDW X[15]=[input],4*$SZ
|
||||||
|
$LDW X[14]=[r8],4*$SZ
|
||||||
|
(p9) br.cond.dpnt.many .L1byte };;
|
||||||
|
{ .mmb; $LDW X[13]=[r9],4*$SZ
|
||||||
|
$LDW X[12]=[r10],4*$SZ
|
||||||
|
(p10) br.cond.dpnt.many .L2byte };;
|
||||||
|
{ .mmb; $LDW X[11]=[input],4*$SZ
|
||||||
|
$LDW X[10]=[r8],4*$SZ
|
||||||
|
(p11) br.cond.dpnt.many .L3byte };;
|
||||||
|
{ .mmb; $LDW X[ 9]=[r9],4*$SZ
|
||||||
|
$LDW X[ 8]=[r10],4*$SZ
|
||||||
|
(p12) br.cond.dpnt.many .L4byte };;
|
||||||
|
{ .mmb; $LDW X[ 7]=[input],4*$SZ
|
||||||
|
$LDW X[ 6]=[r8],4*$SZ
|
||||||
|
(p13) br.cond.dpnt.many .L5byte };;
|
||||||
|
{ .mmb; $LDW X[ 5]=[r9],4*$SZ
|
||||||
|
$LDW X[ 4]=[r10],4*$SZ
|
||||||
|
(p14) br.cond.dpnt.many .L6byte };;
|
||||||
|
{ .mmb; $LDW X[ 3]=[input],4*$SZ
|
||||||
|
$LDW X[ 2]=[r8],4*$SZ
|
||||||
|
(p15) br.cond.dpnt.many .L7byte };;
|
||||||
|
{ .mmb; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
br.many .L_first16 };;
|
||||||
|
.L1byte:
|
||||||
|
{ .mmi; $LDW X[13]=[r9],4*$SZ
|
||||||
|
$LDW X[12]=[r10],4*$SZ
|
||||||
|
shrp X[15]=X[15],X[14],56 };;
|
||||||
|
{ .mmi; $LDW X[11]=[input],4*$SZ
|
||||||
|
$LDW X[10]=[r8],4*$SZ
|
||||||
|
shrp X[14]=X[14],X[13],56 }
|
||||||
|
{ .mmi; $LDW X[ 9]=[r9],4*$SZ
|
||||||
|
$LDW X[ 8]=[r10],4*$SZ
|
||||||
|
shrp X[13]=X[13],X[12],56 };;
|
||||||
|
{ .mmi; $LDW X[ 7]=[input],4*$SZ
|
||||||
|
$LDW X[ 6]=[r8],4*$SZ
|
||||||
|
shrp X[12]=X[12],X[11],56 }
|
||||||
|
{ .mmi; $LDW X[ 5]=[r9],4*$SZ
|
||||||
|
$LDW X[ 4]=[r10],4*$SZ
|
||||||
|
shrp X[11]=X[11],X[10],56 };;
|
||||||
|
{ .mmi; $LDW X[ 3]=[input],4*$SZ
|
||||||
|
$LDW X[ 2]=[r8],4*$SZ
|
||||||
|
shrp X[10]=X[10],X[ 9],56 }
|
||||||
|
{ .mmi; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
shrp X[ 9]=X[ 9],X[ 8],56 };;
|
||||||
|
{ .mii; $LDW T1=[input]
|
||||||
|
shrp X[ 8]=X[ 8],X[ 7],56
|
||||||
|
shrp X[ 7]=X[ 7],X[ 6],56 }
|
||||||
|
{ .mii; shrp X[ 6]=X[ 6],X[ 5],56
|
||||||
|
shrp X[ 5]=X[ 5],X[ 4],56 };;
|
||||||
|
{ .mii; shrp X[ 4]=X[ 4],X[ 3],56
|
||||||
|
shrp X[ 3]=X[ 3],X[ 2],56 }
|
||||||
|
{ .mii; shrp X[ 2]=X[ 2],X[ 1],56
|
||||||
|
shrp X[ 1]=X[ 1],X[ 0],56 }
|
||||||
|
{ .mib; shrp X[ 0]=X[ 0],T1,56
|
||||||
|
br.many .L_first16 };;
|
||||||
|
.L2byte:
|
||||||
|
{ .mmi; $LDW X[11]=[input],4*$SZ
|
||||||
|
$LDW X[10]=[r8],4*$SZ
|
||||||
|
shrp X[15]=X[15],X[14],48 }
|
||||||
|
{ .mmi; $LDW X[ 9]=[r9],4*$SZ
|
||||||
|
$LDW X[ 8]=[r10],4*$SZ
|
||||||
|
shrp X[14]=X[14],X[13],48 };;
|
||||||
|
{ .mmi; $LDW X[ 7]=[input],4*$SZ
|
||||||
|
$LDW X[ 6]=[r8],4*$SZ
|
||||||
|
shrp X[13]=X[13],X[12],48 }
|
||||||
|
{ .mmi; $LDW X[ 5]=[r9],4*$SZ
|
||||||
|
$LDW X[ 4]=[r10],4*$SZ
|
||||||
|
shrp X[12]=X[12],X[11],48 };;
|
||||||
|
{ .mmi; $LDW X[ 3]=[input],4*$SZ
|
||||||
|
$LDW X[ 2]=[r8],4*$SZ
|
||||||
|
shrp X[11]=X[11],X[10],48 }
|
||||||
|
{ .mmi; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
shrp X[10]=X[10],X[ 9],48 };;
|
||||||
|
{ .mii; $LDW T1=[input]
|
||||||
|
shrp X[ 9]=X[ 9],X[ 8],48
|
||||||
|
shrp X[ 8]=X[ 8],X[ 7],48 }
|
||||||
|
{ .mii; shrp X[ 7]=X[ 7],X[ 6],48
|
||||||
|
shrp X[ 6]=X[ 6],X[ 5],48 };;
|
||||||
|
{ .mii; shrp X[ 5]=X[ 5],X[ 4],48
|
||||||
|
shrp X[ 4]=X[ 4],X[ 3],48 }
|
||||||
|
{ .mii; shrp X[ 3]=X[ 3],X[ 2],48
|
||||||
|
shrp X[ 2]=X[ 2],X[ 1],48 }
|
||||||
|
{ .mii; shrp X[ 1]=X[ 1],X[ 0],48
|
||||||
|
shrp X[ 0]=X[ 0],T1,48 }
|
||||||
|
{ .mfb; br.many .L_first16 };;
|
||||||
|
.L3byte:
|
||||||
|
{ .mmi; $LDW X[ 9]=[r9],4*$SZ
|
||||||
|
$LDW X[ 8]=[r10],4*$SZ
|
||||||
|
shrp X[15]=X[15],X[14],40 };;
|
||||||
|
{ .mmi; $LDW X[ 7]=[input],4*$SZ
|
||||||
|
$LDW X[ 6]=[r8],4*$SZ
|
||||||
|
shrp X[14]=X[14],X[13],40 }
|
||||||
|
{ .mmi; $LDW X[ 5]=[r9],4*$SZ
|
||||||
|
$LDW X[ 4]=[r10],4*$SZ
|
||||||
|
shrp X[13]=X[13],X[12],40 };;
|
||||||
|
{ .mmi; $LDW X[ 3]=[input],4*$SZ
|
||||||
|
$LDW X[ 2]=[r8],4*$SZ
|
||||||
|
shrp X[12]=X[12],X[11],40 }
|
||||||
|
{ .mmi; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
shrp X[11]=X[11],X[10],40 };;
|
||||||
|
{ .mii; $LDW T1=[input]
|
||||||
|
shrp X[10]=X[10],X[ 9],40
|
||||||
|
shrp X[ 9]=X[ 9],X[ 8],40 }
|
||||||
|
{ .mii; shrp X[ 8]=X[ 8],X[ 7],40
|
||||||
|
shrp X[ 7]=X[ 7],X[ 6],40 };;
|
||||||
|
{ .mii; shrp X[ 6]=X[ 6],X[ 5],40
|
||||||
|
shrp X[ 5]=X[ 5],X[ 4],40 }
|
||||||
|
{ .mii; shrp X[ 4]=X[ 4],X[ 3],40
|
||||||
|
shrp X[ 3]=X[ 3],X[ 2],40 }
|
||||||
|
{ .mii; shrp X[ 2]=X[ 2],X[ 1],40
|
||||||
|
shrp X[ 1]=X[ 1],X[ 0],40 }
|
||||||
|
{ .mib; shrp X[ 0]=X[ 0],T1,40
|
||||||
|
br.many .L_first16 };;
|
||||||
|
.L4byte:
|
||||||
|
{ .mmi; $LDW X[ 7]=[input],4*$SZ
|
||||||
|
$LDW X[ 6]=[r8],4*$SZ
|
||||||
|
shrp X[15]=X[15],X[14],32 }
|
||||||
|
{ .mmi; $LDW X[ 5]=[r9],4*$SZ
|
||||||
|
$LDW X[ 4]=[r10],4*$SZ
|
||||||
|
shrp X[14]=X[14],X[13],32 };;
|
||||||
|
{ .mmi; $LDW X[ 3]=[input],4*$SZ
|
||||||
|
$LDW X[ 2]=[r8],4*$SZ
|
||||||
|
shrp X[13]=X[13],X[12],32 }
|
||||||
|
{ .mmi; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
shrp X[12]=X[12],X[11],32 };;
|
||||||
|
{ .mii; $LDW T1=[input]
|
||||||
|
shrp X[11]=X[11],X[10],32
|
||||||
|
shrp X[10]=X[10],X[ 9],32 }
|
||||||
|
{ .mii; shrp X[ 9]=X[ 9],X[ 8],32
|
||||||
|
shrp X[ 8]=X[ 8],X[ 7],32 };;
|
||||||
|
{ .mii; shrp X[ 7]=X[ 7],X[ 6],32
|
||||||
|
shrp X[ 6]=X[ 6],X[ 5],32 }
|
||||||
|
{ .mii; shrp X[ 5]=X[ 5],X[ 4],32
|
||||||
|
shrp X[ 4]=X[ 4],X[ 3],32 }
|
||||||
|
{ .mii; shrp X[ 3]=X[ 3],X[ 2],32
|
||||||
|
shrp X[ 2]=X[ 2],X[ 1],32 }
|
||||||
|
{ .mii; shrp X[ 1]=X[ 1],X[ 0],32
|
||||||
|
shrp X[ 0]=X[ 0],T1,32 }
|
||||||
|
{ .mfb; br.many .L_first16 };;
|
||||||
|
.L5byte:
|
||||||
|
{ .mmi; $LDW X[ 5]=[r9],4*$SZ
|
||||||
|
$LDW X[ 4]=[r10],4*$SZ
|
||||||
|
shrp X[15]=X[15],X[14],24 };;
|
||||||
|
{ .mmi; $LDW X[ 3]=[input],4*$SZ
|
||||||
|
$LDW X[ 2]=[r8],4*$SZ
|
||||||
|
shrp X[14]=X[14],X[13],24 }
|
||||||
|
{ .mmi; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
shrp X[13]=X[13],X[12],24 };;
|
||||||
|
{ .mii; $LDW T1=[input]
|
||||||
|
shrp X[12]=X[12],X[11],24
|
||||||
|
shrp X[11]=X[11],X[10],24 }
|
||||||
|
{ .mii; shrp X[10]=X[10],X[ 9],24
|
||||||
|
shrp X[ 9]=X[ 9],X[ 8],24 };;
|
||||||
|
{ .mii; shrp X[ 8]=X[ 8],X[ 7],24
|
||||||
|
shrp X[ 7]=X[ 7],X[ 6],24 }
|
||||||
|
{ .mii; shrp X[ 6]=X[ 6],X[ 5],24
|
||||||
|
shrp X[ 5]=X[ 5],X[ 4],24 }
|
||||||
|
{ .mii; shrp X[ 4]=X[ 4],X[ 3],24
|
||||||
|
shrp X[ 3]=X[ 3],X[ 2],24 }
|
||||||
|
{ .mii; shrp X[ 2]=X[ 2],X[ 1],24
|
||||||
|
shrp X[ 1]=X[ 1],X[ 0],24 }
|
||||||
|
{ .mib; shrp X[ 0]=X[ 0],T1,24
|
||||||
|
br.many .L_first16 };;
|
||||||
|
.L6byte:
|
||||||
|
{ .mmi; $LDW X[ 3]=[input],4*$SZ
|
||||||
|
$LDW X[ 2]=[r8],4*$SZ
|
||||||
|
shrp X[15]=X[15],X[14],16 }
|
||||||
|
{ .mmi; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
shrp X[14]=X[14],X[13],16 };;
|
||||||
|
{ .mii; $LDW T1=[input]
|
||||||
|
shrp X[13]=X[13],X[12],16
|
||||||
|
shrp X[12]=X[12],X[11],16 }
|
||||||
|
{ .mii; shrp X[11]=X[11],X[10],16
|
||||||
|
shrp X[10]=X[10],X[ 9],16 };;
|
||||||
|
{ .mii; shrp X[ 9]=X[ 9],X[ 8],16
|
||||||
|
shrp X[ 8]=X[ 8],X[ 7],16 }
|
||||||
|
{ .mii; shrp X[ 7]=X[ 7],X[ 6],16
|
||||||
|
shrp X[ 6]=X[ 6],X[ 5],16 }
|
||||||
|
{ .mii; shrp X[ 5]=X[ 5],X[ 4],16
|
||||||
|
shrp X[ 4]=X[ 4],X[ 3],16 }
|
||||||
|
{ .mii; shrp X[ 3]=X[ 3],X[ 2],16
|
||||||
|
shrp X[ 2]=X[ 2],X[ 1],16 }
|
||||||
|
{ .mii; shrp X[ 1]=X[ 1],X[ 0],16
|
||||||
|
shrp X[ 0]=X[ 0],T1,16 }
|
||||||
|
{ .mfb; br.many .L_first16 };;
|
||||||
|
.L7byte:
|
||||||
|
{ .mmi; $LDW X[ 1]=[r9],4*$SZ
|
||||||
|
$LDW X[ 0]=[r10],4*$SZ
|
||||||
|
shrp X[15]=X[15],X[14],8 };;
|
||||||
|
{ .mii; $LDW T1=[input]
|
||||||
|
shrp X[14]=X[14],X[13],8
|
||||||
|
shrp X[13]=X[13],X[12],8 }
|
||||||
|
{ .mii; shrp X[12]=X[12],X[11],8
|
||||||
|
shrp X[11]=X[11],X[10],8 };;
|
||||||
|
{ .mii; shrp X[10]=X[10],X[ 9],8
|
||||||
|
shrp X[ 9]=X[ 9],X[ 8],8 }
|
||||||
|
{ .mii; shrp X[ 8]=X[ 8],X[ 7],8
|
||||||
|
shrp X[ 7]=X[ 7],X[ 6],8 }
|
||||||
|
{ .mii; shrp X[ 6]=X[ 6],X[ 5],8
|
||||||
|
shrp X[ 5]=X[ 5],X[ 4],8 }
|
||||||
|
{ .mii; shrp X[ 4]=X[ 4],X[ 3],8
|
||||||
|
shrp X[ 3]=X[ 3],X[ 2],8 }
|
||||||
|
{ .mii; shrp X[ 2]=X[ 2],X[ 1],8
|
||||||
|
shrp X[ 1]=X[ 1],X[ 0],8 }
|
||||||
|
{ .mib; shrp X[ 0]=X[ 0],T1,8
|
||||||
|
br.many .L_first16 };;
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.L_first16:
|
||||||
|
{ .mmi; $LDW K=[Ktbl],$SZ
|
||||||
and T1=F,E
|
and T1=F,E
|
||||||
and T2=A,B }
|
and T2=A,B }
|
||||||
{ .mmi; $LDW K=[Ktbl],$SZ
|
{ .mmi; //$LDW X[15]=[input],$SZ // X[i]=*input++
|
||||||
andcm r8=G,E
|
andcm r8=G,E
|
||||||
and r9=A,C };;
|
and r9=A,C };;
|
||||||
{ .mmi; xor T1=T1,r8 //T1=((e & f) ^ (~e & g))
|
{ .mmi; xor T1=T1,r8 //T1=((e & f) ^ (~e & g))
|
||||||
@@ -235,13 +478,14 @@ $code.=<<___;
|
|||||||
{ .mmi; xor r10=r8,r10 // r10=Sigma0(a)
|
{ .mmi; xor r10=r8,r10 // r10=Sigma0(a)
|
||||||
mov B=A
|
mov B=A
|
||||||
add A=T1,T2 };;
|
add A=T1,T2 };;
|
||||||
.L_first16_ctop:
|
|
||||||
{ .mib; add E=E,T1
|
{ .mib; add E=E,T1
|
||||||
add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a)
|
add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a)
|
||||||
br.ctop.sptk .L_first16 };;
|
br.ctop.sptk .L_first16 };;
|
||||||
|
.L_first16_end:
|
||||||
|
|
||||||
|
{ .mii; mov ar.lc=$rounds-17
|
||||||
|
mov ar.ec=1 };;
|
||||||
|
|
||||||
{ .mib; mov ar.lc=$rounds-17 }
|
|
||||||
{ .mib; mov ar.ec=1 };;
|
|
||||||
.align 32
|
.align 32
|
||||||
.L_rest:
|
.L_rest:
|
||||||
.rotr X[16]
|
.rotr X[16]
|
||||||
@@ -310,46 +554,38 @@ $code.=<<___;
|
|||||||
{ .mmi; xor r10=r8,r10 // r10=Sigma0(a)
|
{ .mmi; xor r10=r8,r10 // r10=Sigma0(a)
|
||||||
mov B=A
|
mov B=A
|
||||||
add A=T1,T2 };;
|
add A=T1,T2 };;
|
||||||
.L_rest_ctop:
|
|
||||||
{ .mib; add E=E,T1
|
{ .mib; add E=E,T1
|
||||||
add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a)
|
add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a)
|
||||||
br.ctop.sptk .L_rest };;
|
br.ctop.sptk .L_rest };;
|
||||||
|
.L_rest_end:
|
||||||
|
|
||||||
|
{ .mmi; add A_=A_,A
|
||||||
|
add B_=B_,B
|
||||||
|
add C_=C_,C }
|
||||||
|
{ .mmi; add D_=D_,D
|
||||||
|
add E_=E_,E
|
||||||
|
cmp.ltu p16,p0=1,num };;
|
||||||
|
{ .mmi; add F_=F_,F
|
||||||
|
add G_=G_,G
|
||||||
|
add H_=H_,H }
|
||||||
|
{ .mmb; add Ktbl=-$SZ*$rounds,Ktbl
|
||||||
|
(p16) add num=-1,num
|
||||||
|
(p16) br.dptk.many .L_outer };;
|
||||||
|
|
||||||
{ .mib; add r8=0*$SZ,ctx
|
{ .mib; add r8=0*$SZ,ctx
|
||||||
add r9=1*$SZ,ctx }
|
add r9=1*$SZ,ctx }
|
||||||
{ .mib; add r10=2*$SZ,ctx
|
{ .mib; add r10=2*$SZ,ctx
|
||||||
add r11=3*$SZ,ctx };;
|
add r11=3*$SZ,ctx };;
|
||||||
{ .mmi; $LDW r32=[r8],4*$SZ
|
{ .mmi; $STW [r8]=A_,4*$SZ
|
||||||
$LDW r33=[r9],4*$SZ }
|
$STW [r9]=B_,4*$SZ
|
||||||
{ .mmi; $LDW r34=[r10],4*$SZ
|
mov ar.lc=lcsave }
|
||||||
$LDW r35=[r11],4*$SZ
|
{ .mmi; $STW [r10]=C_,4*$SZ
|
||||||
cmp.ltu p6,p7=1,num };;
|
$STW [r11]=D_,4*$SZ
|
||||||
{ .mmi; $LDW r36=[r8],-4*$SZ
|
mov pr=prsave,0x1ffff };;
|
||||||
$LDW r37=[r9],-4*$SZ
|
{ .mmb; $STW [r8]=E_
|
||||||
(p6) add Ktbl=-$SZ*$rounds,Ktbl }
|
$STW [r9]=F_ }
|
||||||
{ .mmi; $LDW r38=[r10],-4*$SZ
|
{ .mmb; $STW [r10]=G_
|
||||||
$LDW r39=[r11],-4*$SZ
|
$STW [r11]=H_
|
||||||
(p7) mov ar.lc=r3 };;
|
|
||||||
{ .mmi; add A=A,r32
|
|
||||||
add B=B,r33
|
|
||||||
add C=C,r34 }
|
|
||||||
{ .mmi; add D=D,r35
|
|
||||||
add E=E,r36
|
|
||||||
add F=F,r37 };;
|
|
||||||
{ .mmi; $STW [r8]=A,4*$SZ
|
|
||||||
$STW [r9]=B,4*$SZ
|
|
||||||
add G=G,r38 }
|
|
||||||
{ .mmi; $STW [r10]=C,4*$SZ
|
|
||||||
$STW [r11]=D,4*$SZ
|
|
||||||
add H=H,r39 };;
|
|
||||||
{ .mmi; $STW [r8]=E
|
|
||||||
$STW [r9]=F
|
|
||||||
(p6) add num=-1,num }
|
|
||||||
{ .mmb; $STW [r10]=G
|
|
||||||
$STW [r11]=H
|
|
||||||
(p6) br.dptk.many .L_outer };;
|
|
||||||
|
|
||||||
{ .mib; mov pr=prsave,0x1ffff
|
|
||||||
br.ret.sptk.many b0 };;
|
br.ret.sptk.many b0 };;
|
||||||
.endp $func#
|
.endp $func#
|
||||||
___
|
___
|
||||||
@@ -359,6 +595,9 @@ $code =~ s/_rotr(\s+)([^=]+)=([^,]+),([0-9]+)/shrp$1$2=$3,$3,$4/gm;
|
|||||||
if ($BITS==64) {
|
if ($BITS==64) {
|
||||||
$code =~ s/mux2(\s+)\S+/nop.i$1 0x0/gm;
|
$code =~ s/mux2(\s+)\S+/nop.i$1 0x0/gm;
|
||||||
$code =~ s/mux1(\s+)\S+/nop.i$1 0x0/gm if ($big_endian);
|
$code =~ s/mux1(\s+)\S+/nop.i$1 0x0/gm if ($big_endian);
|
||||||
|
$code =~ s/(shrp\s+X\[[^=]+)=([^,]+),([^,]+),([1-9]+)/$1=$3,$2,64-$4/gm
|
||||||
|
if (!$big_endian);
|
||||||
|
$code =~ s/ld1(\s+)X\[\S+/nop.m$1 0x0/gm;
|
||||||
}
|
}
|
||||||
|
|
||||||
print $code;
|
print $code;
|
||||||
@@ -383,6 +622,7 @@ K256: data4 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|||||||
data4 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
data4 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||||
data4 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
data4 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||||
.size K256#,$SZ*$rounds
|
.size K256#,$SZ*$rounds
|
||||||
|
stringz "SHA256 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
|
||||||
___
|
___
|
||||||
print<<___ if ($BITS==64);
|
print<<___ if ($BITS==64);
|
||||||
.align 64
|
.align 64
|
||||||
@@ -428,4 +668,5 @@ K512: data8 0x428a2f98d728ae22,0x7137449123ef65cd
|
|||||||
data8 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
|
data8 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
|
||||||
data8 0x5fcb6fab3ad6faec,0x6c44198c4a475817
|
data8 0x5fcb6fab3ad6faec,0x6c44198c4a475817
|
||||||
.size K512#,$SZ*$rounds
|
.size K512#,$SZ*$rounds
|
||||||
|
stringz "SHA512 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
|
||||||
___
|
___
|
||||||
|
@@ -69,17 +69,11 @@ int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
|
|||||||
int SHA224_Final (unsigned char *md, SHA256_CTX *c)
|
int SHA224_Final (unsigned char *md, SHA256_CTX *c)
|
||||||
{ return SHA256_Final (md,c); }
|
{ return SHA256_Final (md,c); }
|
||||||
|
|
||||||
#ifndef SHA_LONG_LOG2
|
|
||||||
#define SHA_LONG_LOG2 2 /* default to 32 bits */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define DATA_ORDER_IS_BIG_ENDIAN
|
#define DATA_ORDER_IS_BIG_ENDIAN
|
||||||
|
|
||||||
#define HASH_LONG SHA_LONG
|
#define HASH_LONG SHA_LONG
|
||||||
#define HASH_LONG_LOG2 SHA_LONG_LOG2
|
|
||||||
#define HASH_CTX SHA256_CTX
|
#define HASH_CTX SHA256_CTX
|
||||||
#define HASH_CBLOCK SHA_CBLOCK
|
#define HASH_CBLOCK SHA_CBLOCK
|
||||||
#define HASH_LBLOCK SHA_LBLOCK
|
|
||||||
/*
|
/*
|
||||||
* Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
|
* Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
|
||||||
* default: case below covers for it. It's not clear however if it's
|
* default: case below covers for it. It's not clear however if it's
|
||||||
@@ -112,16 +106,15 @@ int SHA224_Final (unsigned char *md, SHA256_CTX *c)
|
|||||||
#define HASH_UPDATE SHA256_Update
|
#define HASH_UPDATE SHA256_Update
|
||||||
#define HASH_TRANSFORM SHA256_Transform
|
#define HASH_TRANSFORM SHA256_Transform
|
||||||
#define HASH_FINAL SHA256_Final
|
#define HASH_FINAL SHA256_Final
|
||||||
#define HASH_BLOCK_HOST_ORDER sha256_block_host_order
|
|
||||||
#define HASH_BLOCK_DATA_ORDER sha256_block_data_order
|
#define HASH_BLOCK_DATA_ORDER sha256_block_data_order
|
||||||
void sha256_block_host_order (SHA256_CTX *ctx, const void *in, size_t num);
|
#ifndef SHA256_ASM
|
||||||
|
static
|
||||||
|
#endif
|
||||||
void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
|
void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
|
||||||
|
|
||||||
#include "md32_common.h"
|
#include "md32_common.h"
|
||||||
|
|
||||||
#ifdef SHA256_ASM
|
#ifndef SHA256_ASM
|
||||||
void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host);
|
|
||||||
#else
|
|
||||||
static const SHA_LONG K256[64] = {
|
static const SHA_LONG K256[64] = {
|
||||||
0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL,
|
0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL,
|
||||||
0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL,
|
0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL,
|
||||||
@@ -155,10 +148,10 @@ static const SHA_LONG K256[64] = {
|
|||||||
|
|
||||||
#ifdef OPENSSL_SMALL_FOOTPRINT
|
#ifdef OPENSSL_SMALL_FOOTPRINT
|
||||||
|
|
||||||
static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
|
static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
|
||||||
{
|
{
|
||||||
unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
|
unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
|
||||||
SHA_LONG X[16];
|
SHA_LONG X[16],l;
|
||||||
int i;
|
int i;
|
||||||
const unsigned char *data=in;
|
const unsigned char *data=in;
|
||||||
|
|
||||||
@@ -167,25 +160,6 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
|
|||||||
a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
|
a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
|
||||||
e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
|
e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
|
||||||
|
|
||||||
if (host)
|
|
||||||
{
|
|
||||||
const SHA_LONG *W=(const SHA_LONG *)data;
|
|
||||||
|
|
||||||
for (i=0;i<16;i++)
|
|
||||||
{
|
|
||||||
T1 = X[i] = W[i];
|
|
||||||
T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
|
|
||||||
T2 = Sigma0(a) + Maj(a,b,c);
|
|
||||||
h = g; g = f; f = e; e = d + T1;
|
|
||||||
d = c; c = b; b = a; a = T1 + T2;
|
|
||||||
}
|
|
||||||
|
|
||||||
data += SHA256_CBLOCK;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SHA_LONG l;
|
|
||||||
|
|
||||||
for (i=0;i<16;i++)
|
for (i=0;i<16;i++)
|
||||||
{
|
{
|
||||||
HOST_c2l(data,l); T1 = X[i] = l;
|
HOST_c2l(data,l); T1 = X[i] = l;
|
||||||
@@ -194,7 +168,6 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
|
|||||||
h = g; g = f; f = e; e = d + T1;
|
h = g; g = f; f = e; e = d + T1;
|
||||||
d = c; c = b; b = a; a = T1 + T2;
|
d = c; c = b; b = a; a = T1 + T2;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for (;i<64;i++)
|
for (;i<64;i++)
|
||||||
{
|
{
|
||||||
@@ -227,19 +200,20 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
|
|||||||
T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
|
T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
|
||||||
ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
|
ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
|
||||||
|
|
||||||
static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
|
static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
|
||||||
{
|
{
|
||||||
unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1;
|
unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1;
|
||||||
SHA_LONG X[16];
|
SHA_LONG X[16];
|
||||||
int i;
|
int i;
|
||||||
const unsigned char *data=in;
|
const unsigned char *data=in;
|
||||||
|
const union { long one; char little; } is_endian = {1};
|
||||||
|
|
||||||
while (num--) {
|
while (num--) {
|
||||||
|
|
||||||
a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
|
a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
|
||||||
e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
|
e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
|
||||||
|
|
||||||
if (host)
|
if (!is_endian.little && sizeof(SHA_LONG)==4 && ((size_t)in%4)==0)
|
||||||
{
|
{
|
||||||
const SHA_LONG *W=(const SHA_LONG *)data;
|
const SHA_LONG *W=(const SHA_LONG *)data;
|
||||||
|
|
||||||
@@ -305,15 +279,4 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
|
|||||||
#endif
|
#endif
|
||||||
#endif /* SHA256_ASM */
|
#endif /* SHA256_ASM */
|
||||||
|
|
||||||
/*
|
|
||||||
* Idea is to trade couple of cycles for some space. On IA-32 we save
|
|
||||||
* about 4K in "big footprint" case. In "small footprint" case any gain
|
|
||||||
* is appreciated:-)
|
|
||||||
*/
|
|
||||||
void HASH_BLOCK_HOST_ORDER (SHA256_CTX *ctx, const void *in, size_t num)
|
|
||||||
{ sha256_block (ctx,in,num,1); }
|
|
||||||
|
|
||||||
void HASH_BLOCK_DATA_ORDER (SHA256_CTX *ctx, const void *in, size_t num)
|
|
||||||
{ sha256_block (ctx,in,num,0); }
|
|
||||||
|
|
||||||
#endif /* OPENSSL_NO_SHA256 */
|
#endif /* OPENSSL_NO_SHA256 */
|
||||||
|
@@ -52,7 +52,10 @@
|
|||||||
|
|
||||||
const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
|
const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
|
||||||
|
|
||||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
|
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||||
|
defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
|
||||||
|
defined(__s390__) || defined(__s390x__) || \
|
||||||
|
defined(SHA512_ASM)
|
||||||
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
|
#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -89,7 +92,7 @@ int SHA512_Init (SHA512_CTX *c)
|
|||||||
#ifndef SHA512_ASM
|
#ifndef SHA512_ASM
|
||||||
static
|
static
|
||||||
#endif
|
#endif
|
||||||
void sha512_block (SHA512_CTX *ctx, const void *in, size_t num);
|
void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
|
||||||
|
|
||||||
int SHA512_Final (unsigned char *md, SHA512_CTX *c)
|
int SHA512_Final (unsigned char *md, SHA512_CTX *c)
|
||||||
{
|
{
|
||||||
@@ -100,7 +103,7 @@ int SHA512_Final (unsigned char *md, SHA512_CTX *c)
|
|||||||
n++;
|
n++;
|
||||||
if (n > (sizeof(c->u)-16))
|
if (n > (sizeof(c->u)-16))
|
||||||
memset (p+n,0,sizeof(c->u)-n), n=0,
|
memset (p+n,0,sizeof(c->u)-n), n=0,
|
||||||
sha512_block (c,p,1);
|
sha512_block_data_order (c,p,1);
|
||||||
|
|
||||||
memset (p+n,0,sizeof(c->u)-16-n);
|
memset (p+n,0,sizeof(c->u)-16-n);
|
||||||
#ifdef B_ENDIAN
|
#ifdef B_ENDIAN
|
||||||
@@ -125,7 +128,7 @@ int SHA512_Final (unsigned char *md, SHA512_CTX *c)
|
|||||||
p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
|
p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sha512_block (c,p,1);
|
sha512_block_data_order (c,p,1);
|
||||||
|
|
||||||
if (md==0) return 0;
|
if (md==0) return 0;
|
||||||
|
|
||||||
@@ -197,7 +200,7 @@ int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
|
|||||||
else {
|
else {
|
||||||
memcpy (p+c->num,data,n), c->num = 0;
|
memcpy (p+c->num,data,n), c->num = 0;
|
||||||
len-=n, data+=n;
|
len-=n, data+=n;
|
||||||
sha512_block (c,p,1);
|
sha512_block_data_order (c,p,1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -207,12 +210,12 @@ int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
|
|||||||
if ((size_t)data%sizeof(c->u.d[0]) != 0)
|
if ((size_t)data%sizeof(c->u.d[0]) != 0)
|
||||||
while (len >= sizeof(c->u))
|
while (len >= sizeof(c->u))
|
||||||
memcpy (p,data,sizeof(c->u)),
|
memcpy (p,data,sizeof(c->u)),
|
||||||
sha512_block (c,p,1),
|
sha512_block_data_order (c,p,1),
|
||||||
len -= sizeof(c->u),
|
len -= sizeof(c->u),
|
||||||
data += sizeof(c->u);
|
data += sizeof(c->u);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
sha512_block (c,data,len/sizeof(c->u)),
|
sha512_block_data_order (c,data,len/sizeof(c->u)),
|
||||||
data += len,
|
data += len,
|
||||||
len %= sizeof(c->u),
|
len %= sizeof(c->u),
|
||||||
data -= len;
|
data -= len;
|
||||||
@@ -227,7 +230,7 @@ int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
|
|||||||
{ return SHA512_Update (c,data,len); }
|
{ return SHA512_Update (c,data,len); }
|
||||||
|
|
||||||
void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
|
void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
|
||||||
{ sha512_block (c,data,1); }
|
{ sha512_block_data_order (c,data,1); }
|
||||||
|
|
||||||
unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
|
unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
|
||||||
{
|
{
|
||||||
@@ -301,11 +304,70 @@ static const SHA_LONG64 K512[80] = {
|
|||||||
#ifndef PEDANTIC
|
#ifndef PEDANTIC
|
||||||
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
|
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
|
||||||
# if defined(__x86_64) || defined(__x86_64__)
|
# if defined(__x86_64) || defined(__x86_64__)
|
||||||
|
# define ROTR(a,n) ({ unsigned long ret; \
|
||||||
|
asm ("rorq %1,%0" \
|
||||||
|
: "=r"(ret) \
|
||||||
|
: "J"(n),"0"(a) \
|
||||||
|
: "cc"); ret; })
|
||||||
|
# if !defined(B_ENDIAN)
|
||||||
# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
|
# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
|
||||||
asm ("bswapq %0" \
|
asm ("bswapq %0" \
|
||||||
: "=r"(ret) \
|
: "=r"(ret) \
|
||||||
: "0"(ret)); ret; })
|
: "0"(ret)); ret; })
|
||||||
# endif
|
# endif
|
||||||
|
# elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
|
||||||
|
# if defined(I386_ONLY)
|
||||||
|
# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
|
||||||
|
unsigned int hi,lo; \
|
||||||
|
asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
|
||||||
|
"roll $16,%%eax; roll $16,%%edx; "\
|
||||||
|
"xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
|
||||||
|
: "=a"(lo),"=d"(hi) \
|
||||||
|
: "0"(p[1]),"1"(p[0]) : "cc"); \
|
||||||
|
((SHA_LONG64)hi)<<32|lo; })
|
||||||
|
# else
|
||||||
|
# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
|
||||||
|
unsigned int hi,lo; \
|
||||||
|
asm ("bswapl %0; bswapl %1;" \
|
||||||
|
: "=r"(lo),"=r"(hi) \
|
||||||
|
: "0"(p[1]),"1"(p[0])); \
|
||||||
|
((SHA_LONG64)hi)<<32|lo; })
|
||||||
|
# endif
|
||||||
|
# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
|
||||||
|
# define ROTR(a,n) ({ unsigned long ret; \
|
||||||
|
asm ("rotrdi %0,%1,%2" \
|
||||||
|
: "=r"(ret) \
|
||||||
|
: "r"(a),"K"(n)); ret; })
|
||||||
|
# endif
|
||||||
|
# elif defined(_MSC_VER)
|
||||||
|
# if defined(_WIN64) /* applies to both IA-64 and AMD64 */
|
||||||
|
# define ROTR(a,n) _rotr64((a),n)
|
||||||
|
# endif
|
||||||
|
# if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
|
||||||
|
# if defined(I386_ONLY)
|
||||||
|
static SHA_LONG64 __fastcall __pull64be(const void *x)
|
||||||
|
{ _asm mov edx, [ecx + 0]
|
||||||
|
_asm mov eax, [ecx + 4]
|
||||||
|
_asm xchg dh,dl
|
||||||
|
_asm xchg ah,al
|
||||||
|
_asm rol edx,16
|
||||||
|
_asm rol eax,16
|
||||||
|
_asm xchg dh,dl
|
||||||
|
_asm xchg ah,al
|
||||||
|
}
|
||||||
|
# else
|
||||||
|
static SHA_LONG64 __fastcall __pull64be(const void *x)
|
||||||
|
{ _asm mov edx, [ecx + 0]
|
||||||
|
_asm mov eax, [ecx + 4]
|
||||||
|
_asm bswap edx
|
||||||
|
_asm bswap eax
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
# define PULL64(x) __pull64be(&(x))
|
||||||
|
# if _MSC_VER<=1200
|
||||||
|
# pragma inline_depth(0)
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -314,27 +376,6 @@ static const SHA_LONG64 K512[80] = {
|
|||||||
#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
|
#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef PEDANTIC
|
|
||||||
# if defined(_MSC_VER)
|
|
||||||
# if defined(_WIN64) /* applies to both IA-64 and AMD64 */
|
|
||||||
# define ROTR(a,n) _rotr64((a),n)
|
|
||||||
# endif
|
|
||||||
# elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
|
|
||||||
# if defined(__x86_64) || defined(__x86_64__)
|
|
||||||
# define ROTR(a,n) ({ unsigned long ret; \
|
|
||||||
asm ("rorq %1,%0" \
|
|
||||||
: "=r"(ret) \
|
|
||||||
: "J"(n),"0"(a) \
|
|
||||||
: "cc"); ret; })
|
|
||||||
# elif defined(_ARCH_PPC) && defined(__64BIT__)
|
|
||||||
# define ROTR(a,n) ({ unsigned long ret; \
|
|
||||||
asm ("rotrdi %0,%1,%2" \
|
|
||||||
: "=r"(ret) \
|
|
||||||
: "r"(a),"K"(n)); ret; })
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ROTR
|
#ifndef ROTR
|
||||||
#define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
|
#define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
|
||||||
#endif
|
#endif
|
||||||
@@ -357,7 +398,7 @@ static const SHA_LONG64 K512[80] = {
|
|||||||
|
|
||||||
#ifdef OPENSSL_SMALL_FOOTPRINT
|
#ifdef OPENSSL_SMALL_FOOTPRINT
|
||||||
|
|
||||||
static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
|
static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
|
||||||
{
|
{
|
||||||
const SHA_LONG64 *W=in;
|
const SHA_LONG64 *W=in;
|
||||||
SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
|
SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
|
||||||
@@ -418,7 +459,7 @@ static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
|
|||||||
T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
|
T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
|
||||||
ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
|
ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
|
||||||
|
|
||||||
static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
|
static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
|
||||||
{
|
{
|
||||||
const SHA_LONG64 *W=in;
|
const SHA_LONG64 *W=in;
|
||||||
SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
|
SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
|
||||||
|
@@ -62,17 +62,11 @@
|
|||||||
#include <openssl/opensslconf.h>
|
#include <openssl/opensslconf.h>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
#ifndef SHA_LONG_LOG2
|
|
||||||
#define SHA_LONG_LOG2 2 /* default to 32 bits */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define DATA_ORDER_IS_BIG_ENDIAN
|
#define DATA_ORDER_IS_BIG_ENDIAN
|
||||||
|
|
||||||
#define HASH_LONG SHA_LONG
|
#define HASH_LONG SHA_LONG
|
||||||
#define HASH_LONG_LOG2 SHA_LONG_LOG2
|
|
||||||
#define HASH_CTX SHA_CTX
|
#define HASH_CTX SHA_CTX
|
||||||
#define HASH_CBLOCK SHA_CBLOCK
|
#define HASH_CBLOCK SHA_CBLOCK
|
||||||
#define HASH_LBLOCK SHA_LBLOCK
|
|
||||||
#define HASH_MAKE_STRING(c,s) do { \
|
#define HASH_MAKE_STRING(c,s) do { \
|
||||||
unsigned long ll; \
|
unsigned long ll; \
|
||||||
ll=(c)->h0; HOST_l2c(ll,(s)); \
|
ll=(c)->h0; HOST_l2c(ll,(s)); \
|
||||||
@@ -88,12 +82,10 @@
|
|||||||
# define HASH_TRANSFORM SHA_Transform
|
# define HASH_TRANSFORM SHA_Transform
|
||||||
# define HASH_FINAL SHA_Final
|
# define HASH_FINAL SHA_Final
|
||||||
# define HASH_INIT SHA_Init
|
# define HASH_INIT SHA_Init
|
||||||
# define HASH_BLOCK_HOST_ORDER sha_block_host_order
|
|
||||||
# define HASH_BLOCK_DATA_ORDER sha_block_data_order
|
# define HASH_BLOCK_DATA_ORDER sha_block_data_order
|
||||||
# define Xupdate(a,ix,ia,ib,ic,id) (ix=(a)=(ia^ib^ic^id))
|
# define Xupdate(a,ix,ia,ib,ic,id) (ix=(a)=(ia^ib^ic^id))
|
||||||
|
|
||||||
void sha_block_host_order (SHA_CTX *c, const void *p,size_t num);
|
static void sha_block_data_order (SHA_CTX *c, const void *p,size_t num);
|
||||||
void sha_block_data_order (SHA_CTX *c, const void *p,size_t num);
|
|
||||||
|
|
||||||
#elif defined(SHA_1)
|
#elif defined(SHA_1)
|
||||||
|
|
||||||
@@ -101,7 +93,6 @@
|
|||||||
# define HASH_TRANSFORM SHA1_Transform
|
# define HASH_TRANSFORM SHA1_Transform
|
||||||
# define HASH_FINAL SHA1_Final
|
# define HASH_FINAL SHA1_Final
|
||||||
# define HASH_INIT SHA1_Init
|
# define HASH_INIT SHA1_Init
|
||||||
# define HASH_BLOCK_HOST_ORDER sha1_block_host_order
|
|
||||||
# define HASH_BLOCK_DATA_ORDER sha1_block_data_order
|
# define HASH_BLOCK_DATA_ORDER sha1_block_data_order
|
||||||
# if defined(__MWERKS__) && defined(__MC68K__)
|
# if defined(__MWERKS__) && defined(__MC68K__)
|
||||||
/* Metrowerks for Motorola fails otherwise:-( <appro@fy.chalmers.se> */
|
/* Metrowerks for Motorola fails otherwise:-( <appro@fy.chalmers.se> */
|
||||||
@@ -114,24 +105,10 @@
|
|||||||
)
|
)
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# ifdef SHA1_ASM
|
#ifndef SHA1_ASM
|
||||||
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
|
static
|
||||||
# if !defined(B_ENDIAN)
|
#endif
|
||||||
# define sha1_block_host_order sha1_block_asm_host_order
|
void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num);
|
||||||
# define DONT_IMPLEMENT_BLOCK_HOST_ORDER
|
|
||||||
# define sha1_block_data_order sha1_block_asm_data_order
|
|
||||||
# define DONT_IMPLEMENT_BLOCK_DATA_ORDER
|
|
||||||
# define HASH_BLOCK_DATA_ORDER_ALIGNED sha1_block_asm_data_order
|
|
||||||
# endif
|
|
||||||
# elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
|
|
||||||
# define sha1_block_host_order sha1_block_asm_host_order
|
|
||||||
# define DONT_IMPLEMENT_BLOCK_HOST_ORDER
|
|
||||||
# define sha1_block_data_order sha1_block_asm_data_order
|
|
||||||
# define DONT_IMPLEMENT_BLOCK_DATA_ORDER
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
void sha1_block_host_order (SHA_CTX *c, const void *p,size_t num);
|
|
||||||
void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num);
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
# error "Either SHA_0 or SHA_1 must be defined."
|
# error "Either SHA_0 or SHA_1 must be defined."
|
||||||
@@ -229,133 +206,8 @@ int HASH_INIT (SHA_CTX *c)
|
|||||||
# define X(i) XX[i]
|
# define X(i) XX[i]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER
|
#if !defined(SHA_1) || !defined(SHA1_ASM)
|
||||||
void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, size_t num)
|
static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
|
||||||
{
|
|
||||||
const SHA_LONG *W=d;
|
|
||||||
register unsigned MD32_REG_T A,B,C,D,E,T;
|
|
||||||
#ifndef MD32_XARRAY
|
|
||||||
unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7,
|
|
||||||
XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15;
|
|
||||||
#else
|
|
||||||
SHA_LONG XX[16];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
A=c->h0;
|
|
||||||
B=c->h1;
|
|
||||||
C=c->h2;
|
|
||||||
D=c->h3;
|
|
||||||
E=c->h4;
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
BODY_00_15( 0,A,B,C,D,E,T,W[ 0]);
|
|
||||||
BODY_00_15( 1,T,A,B,C,D,E,W[ 1]);
|
|
||||||
BODY_00_15( 2,E,T,A,B,C,D,W[ 2]);
|
|
||||||
BODY_00_15( 3,D,E,T,A,B,C,W[ 3]);
|
|
||||||
BODY_00_15( 4,C,D,E,T,A,B,W[ 4]);
|
|
||||||
BODY_00_15( 5,B,C,D,E,T,A,W[ 5]);
|
|
||||||
BODY_00_15( 6,A,B,C,D,E,T,W[ 6]);
|
|
||||||
BODY_00_15( 7,T,A,B,C,D,E,W[ 7]);
|
|
||||||
BODY_00_15( 8,E,T,A,B,C,D,W[ 8]);
|
|
||||||
BODY_00_15( 9,D,E,T,A,B,C,W[ 9]);
|
|
||||||
BODY_00_15(10,C,D,E,T,A,B,W[10]);
|
|
||||||
BODY_00_15(11,B,C,D,E,T,A,W[11]);
|
|
||||||
BODY_00_15(12,A,B,C,D,E,T,W[12]);
|
|
||||||
BODY_00_15(13,T,A,B,C,D,E,W[13]);
|
|
||||||
BODY_00_15(14,E,T,A,B,C,D,W[14]);
|
|
||||||
BODY_00_15(15,D,E,T,A,B,C,W[15]);
|
|
||||||
|
|
||||||
BODY_16_19(16,C,D,E,T,A,B,X( 0),W[ 0],W[ 2],W[ 8],W[13]);
|
|
||||||
BODY_16_19(17,B,C,D,E,T,A,X( 1),W[ 1],W[ 3],W[ 9],W[14]);
|
|
||||||
BODY_16_19(18,A,B,C,D,E,T,X( 2),W[ 2],W[ 4],W[10],W[15]);
|
|
||||||
BODY_16_19(19,T,A,B,C,D,E,X( 3),W[ 3],W[ 5],W[11],X( 0));
|
|
||||||
|
|
||||||
BODY_20_31(20,E,T,A,B,C,D,X( 4),W[ 4],W[ 6],W[12],X( 1));
|
|
||||||
BODY_20_31(21,D,E,T,A,B,C,X( 5),W[ 5],W[ 7],W[13],X( 2));
|
|
||||||
BODY_20_31(22,C,D,E,T,A,B,X( 6),W[ 6],W[ 8],W[14],X( 3));
|
|
||||||
BODY_20_31(23,B,C,D,E,T,A,X( 7),W[ 7],W[ 9],W[15],X( 4));
|
|
||||||
BODY_20_31(24,A,B,C,D,E,T,X( 8),W[ 8],W[10],X( 0),X( 5));
|
|
||||||
BODY_20_31(25,T,A,B,C,D,E,X( 9),W[ 9],W[11],X( 1),X( 6));
|
|
||||||
BODY_20_31(26,E,T,A,B,C,D,X(10),W[10],W[12],X( 2),X( 7));
|
|
||||||
BODY_20_31(27,D,E,T,A,B,C,X(11),W[11],W[13],X( 3),X( 8));
|
|
||||||
BODY_20_31(28,C,D,E,T,A,B,X(12),W[12],W[14],X( 4),X( 9));
|
|
||||||
BODY_20_31(29,B,C,D,E,T,A,X(13),W[13],W[15],X( 5),X(10));
|
|
||||||
BODY_20_31(30,A,B,C,D,E,T,X(14),W[14],X( 0),X( 6),X(11));
|
|
||||||
BODY_20_31(31,T,A,B,C,D,E,X(15),W[15],X( 1),X( 7),X(12));
|
|
||||||
|
|
||||||
BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13));
|
|
||||||
BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14));
|
|
||||||
BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15));
|
|
||||||
BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0));
|
|
||||||
BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1));
|
|
||||||
BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2));
|
|
||||||
BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3));
|
|
||||||
BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4));
|
|
||||||
|
|
||||||
BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5));
|
|
||||||
BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6));
|
|
||||||
BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7));
|
|
||||||
BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8));
|
|
||||||
BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9));
|
|
||||||
BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10));
|
|
||||||
BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11));
|
|
||||||
BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12));
|
|
||||||
BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13));
|
|
||||||
BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14));
|
|
||||||
BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15));
|
|
||||||
BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0));
|
|
||||||
BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1));
|
|
||||||
BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2));
|
|
||||||
BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3));
|
|
||||||
BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4));
|
|
||||||
BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5));
|
|
||||||
BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6));
|
|
||||||
BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7));
|
|
||||||
BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8));
|
|
||||||
|
|
||||||
BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9));
|
|
||||||
BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10));
|
|
||||||
BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11));
|
|
||||||
BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12));
|
|
||||||
BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13));
|
|
||||||
BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14));
|
|
||||||
BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15));
|
|
||||||
BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0));
|
|
||||||
BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1));
|
|
||||||
BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2));
|
|
||||||
BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3));
|
|
||||||
BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4));
|
|
||||||
BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5));
|
|
||||||
BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6));
|
|
||||||
BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7));
|
|
||||||
BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8));
|
|
||||||
BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9));
|
|
||||||
BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10));
|
|
||||||
BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11));
|
|
||||||
BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12));
|
|
||||||
|
|
||||||
c->h0=(c->h0+E)&0xffffffffL;
|
|
||||||
c->h1=(c->h1+T)&0xffffffffL;
|
|
||||||
c->h2=(c->h2+A)&0xffffffffL;
|
|
||||||
c->h3=(c->h3+B)&0xffffffffL;
|
|
||||||
c->h4=(c->h4+C)&0xffffffffL;
|
|
||||||
|
|
||||||
if (--num == 0) break;
|
|
||||||
|
|
||||||
A=c->h0;
|
|
||||||
B=c->h1;
|
|
||||||
C=c->h2;
|
|
||||||
D=c->h3;
|
|
||||||
E=c->h4;
|
|
||||||
|
|
||||||
W+=SHA_LBLOCK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER
|
|
||||||
void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
|
|
||||||
{
|
{
|
||||||
const unsigned char *data=p;
|
const unsigned char *data=p;
|
||||||
register unsigned MD32_REG_T A,B,C,D,E,T,l;
|
register unsigned MD32_REG_T A,B,C,D,E,T,l;
|
||||||
@@ -374,7 +226,34 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
|
|||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
const union { long one; char little; } is_endian = {1};
|
||||||
|
|
||||||
|
if (!is_endian.little && sizeof(SHA_LONG)==4 && ((size_t)p%4)==0)
|
||||||
|
{
|
||||||
|
const SHA_LONG *W=(const SHA_LONG *)data;
|
||||||
|
|
||||||
|
X( 0) = W[0]; X( 1) = W[ 1];
|
||||||
|
BODY_00_15( 0,A,B,C,D,E,T,X( 0)); X( 2) = W[ 2];
|
||||||
|
BODY_00_15( 1,T,A,B,C,D,E,X( 1)); X( 3) = W[ 3];
|
||||||
|
BODY_00_15( 2,E,T,A,B,C,D,X( 2)); X( 4) = W[ 4];
|
||||||
|
BODY_00_15( 3,D,E,T,A,B,C,X( 3)); X( 5) = W[ 5];
|
||||||
|
BODY_00_15( 4,C,D,E,T,A,B,X( 4)); X( 6) = W[ 6];
|
||||||
|
BODY_00_15( 5,B,C,D,E,T,A,X( 5)); X( 7) = W[ 7];
|
||||||
|
BODY_00_15( 6,A,B,C,D,E,T,X( 6)); X( 8) = W[ 8];
|
||||||
|
BODY_00_15( 7,T,A,B,C,D,E,X( 7)); X( 9) = W[ 9];
|
||||||
|
BODY_00_15( 8,E,T,A,B,C,D,X( 8)); X(10) = W[10];
|
||||||
|
BODY_00_15( 9,D,E,T,A,B,C,X( 9)); X(11) = W[11];
|
||||||
|
BODY_00_15(10,C,D,E,T,A,B,X(10)); X(12) = W[12];
|
||||||
|
BODY_00_15(11,B,C,D,E,T,A,X(11)); X(13) = W[13];
|
||||||
|
BODY_00_15(12,A,B,C,D,E,T,X(12)); X(14) = W[14];
|
||||||
|
BODY_00_15(13,T,A,B,C,D,E,X(13)); X(15) = W[15];
|
||||||
|
BODY_00_15(14,E,T,A,B,C,D,X(14));
|
||||||
|
BODY_00_15(15,D,E,T,A,B,C,X(15));
|
||||||
|
|
||||||
|
data += SHA_CBLOCK;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l;
|
HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l;
|
||||||
BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l;
|
BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l;
|
||||||
BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l;
|
BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l;
|
||||||
@@ -392,6 +271,7 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
|
|||||||
BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l;
|
BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l;
|
||||||
BODY_00_15(14,E,T,A,B,C,D,X(14));
|
BODY_00_15(14,E,T,A,B,C,D,X(14));
|
||||||
BODY_00_15(15,D,E,T,A,B,C,X(15));
|
BODY_00_15(15,D,E,T,A,B,C,X(15));
|
||||||
|
}
|
||||||
|
|
||||||
BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13));
|
BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13));
|
||||||
BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14));
|
BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14));
|
||||||
@@ -511,54 +391,8 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
|
|||||||
E=D, D=C, C=ROTATE(B,30), B=A; \
|
E=D, D=C, C=ROTATE(B,30), B=A; \
|
||||||
A=ROTATE(A,5)+T+xa; } while(0)
|
A=ROTATE(A,5)+T+xa; } while(0)
|
||||||
|
|
||||||
#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER
|
#if !defined(SHA_1) || !defined(SHA1_ASM)
|
||||||
void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, size_t num)
|
static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
|
||||||
{
|
|
||||||
const SHA_LONG *W=d;
|
|
||||||
register unsigned MD32_REG_T A,B,C,D,E,T;
|
|
||||||
int i;
|
|
||||||
SHA_LONG X[16];
|
|
||||||
|
|
||||||
A=c->h0;
|
|
||||||
B=c->h1;
|
|
||||||
C=c->h2;
|
|
||||||
D=c->h3;
|
|
||||||
E=c->h4;
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
for (i=0;i<16;i++)
|
|
||||||
{ X[i]=W[i]; BODY_00_15(X[i]); }
|
|
||||||
for (i=0;i<4;i++)
|
|
||||||
{ BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); }
|
|
||||||
for (;i<24;i++)
|
|
||||||
{ BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
|
|
||||||
for (i=0;i<20;i++)
|
|
||||||
{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
|
|
||||||
for (i=4;i<24;i++)
|
|
||||||
{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
|
|
||||||
|
|
||||||
c->h0=(c->h0+A)&0xffffffffL;
|
|
||||||
c->h1=(c->h1+B)&0xffffffffL;
|
|
||||||
c->h2=(c->h2+C)&0xffffffffL;
|
|
||||||
c->h3=(c->h3+D)&0xffffffffL;
|
|
||||||
c->h4=(c->h4+E)&0xffffffffL;
|
|
||||||
|
|
||||||
if (--num == 0) break;
|
|
||||||
|
|
||||||
A=c->h0;
|
|
||||||
B=c->h1;
|
|
||||||
C=c->h2;
|
|
||||||
D=c->h3;
|
|
||||||
E=c->h4;
|
|
||||||
|
|
||||||
W+=SHA_LBLOCK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER
|
|
||||||
void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
|
|
||||||
{
|
{
|
||||||
const unsigned char *data=p;
|
const unsigned char *data=p;
|
||||||
register unsigned MD32_REG_T A,B,C,D,E,T,l;
|
register unsigned MD32_REG_T A,B,C,D,E,T,l;
|
||||||
|
Reference in New Issue
Block a user