Squeeze a bit more speed out of MD5 assembler.

This commit is contained in:
Ben Laurie 1999-01-30 17:53:00 +00:00
parent 679ab7c39e
commit a96e7810e2
4 changed files with 23 additions and 748 deletions

View File

@ -5,6 +5,10 @@
Changes between 0.9.1c and 0.9.2
*) Squeeze another 7% of speed out of MD5 assembler, at least on a P2. I'd
like to hear about it if this slows down other processors.
[Ben Laurie]
*) Add CygWin32 platform information to Configure script.
[Alan Batie <batie@aahz.jf.intel.com>]

View File

@ -61,7 +61,7 @@ asm/mx86-out.o: asm/mx86unix.cpp
asm/mx86bsdi.o: asm/mx86unix.cpp
$(CPP) -DBSDI asm/mx86unix.cpp | sed 's/ :/:/' | as -o asm/mx86bsdi.o
asm/mx86unix.cpp:
asm/mx86unix.cpp: asm/md5-586.pl
(cd asm; perl md5-586.pl cpp >mx86unix.cpp)
files:

View File

@ -54,7 +54,6 @@ sub R0
&and($tmp1,$b); # F function - part 3
&lea($a,&DWP($t,$a,$tmp2,1));
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
&xor($tmp1,$d); # F function - part 4
&add($a,$tmp1);
@ -62,8 +61,10 @@ sub R0
&mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1
&rotl($a,$s);
&add($a,$b);
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
&add($a,$b);
}
sub R1
@ -106,10 +107,10 @@ if (($n & 1) == 0)
&lea($a,&DWP($t,$a,$tmp2,1));
&add($a,$tmp1);
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0));
&rotl($a,$s);
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0));
&mov($tmp1,&Np($c));
}
else

View File

@ -1,730 +0,0 @@
/* Run the C pre-processor over this file with one of the following defined
* ELF - elf object files,
* OUT - a.out object files,
* BSDI - BSDI style a.out object files
* SOL - Solaris style elf
*/
#define TYPE(a,b) .type a,b
#define SIZE(a,b) .size a,b
#if defined(OUT) || defined(BSDI)
#define md5_block_x86 _md5_block_x86
#endif
#ifdef OUT
#define OK 1
#define ALIGN 4
#endif
#ifdef BSDI
#define OK 1
#define ALIGN 4
#undef SIZE
#undef TYPE
#define SIZE(a,b)
#define TYPE(a,b)
#endif
#if defined(ELF) || defined(SOL)
#define OK 1
#define ALIGN 16
#endif
#ifndef OK
You need to define one of
ELF - elf systems - linux-elf, NetBSD and DG-UX
OUT - a.out systems - linux-a.out and FreeBSD
SOL - solaris systems, which are elf with strange comment lines
BSDI - a.out with a very primative version of as.
#endif
/* Let the Assembler begin :-) */
/* Don't even think of reading this code */
/* It was automatically generated by md5-586.pl */
/* Which is a perl program used to generate the x86 assember for */
/* any of elf, a.out, BSDI,Win32, or Solaris */
/* eric <eay@cryptsoft.com> */
.file "md5-586.s"
.version "01.01"
gcc2_compiled.:
.text
.align ALIGN
.globl md5_block_x86
TYPE(md5_block_x86,@function)
md5_block_x86:
pushl %esi
pushl %edi
movl 12(%esp), %edi
movl 16(%esp), %esi
movl 20(%esp), %ecx
pushl %ebp
pushl %ebx
addl %esi, %ecx
subl $64, %ecx
movl (%edi), %eax
pushl %ecx
movl 4(%edi), %ebx
movl 8(%edi), %ecx
movl 12(%edi), %edx
.L000start:
/* R0 section */
movl %ecx, %edi
movl (%esi), %ebp
/* R0 0 */
xorl %edx, %edi
andl %ebx, %edi
leal 3614090360(%eax,%ebp,1),%eax
movl 4(%esi), %ebp
xorl %edx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $7, %eax
addl %ebx, %eax
/* R0 1 */
xorl %ecx, %edi
andl %eax, %edi
leal 3905402710(%edx,%ebp,1),%edx
movl 8(%esi), %ebp
xorl %ecx, %edi
addl %edi, %edx
movl %eax, %edi
roll $12, %edx
addl %eax, %edx
/* R0 2 */
xorl %ebx, %edi
andl %edx, %edi
leal 606105819(%ecx,%ebp,1),%ecx
movl 12(%esi), %ebp
xorl %ebx, %edi
addl %edi, %ecx
movl %edx, %edi
roll $17, %ecx
addl %edx, %ecx
/* R0 3 */
xorl %eax, %edi
andl %ecx, %edi
leal 3250441966(%ebx,%ebp,1),%ebx
movl 16(%esi), %ebp
xorl %eax, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $22, %ebx
addl %ecx, %ebx
/* R0 4 */
xorl %edx, %edi
andl %ebx, %edi
leal 4118548399(%eax,%ebp,1),%eax
movl 20(%esi), %ebp
xorl %edx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $7, %eax
addl %ebx, %eax
/* R0 5 */
xorl %ecx, %edi
andl %eax, %edi
leal 1200080426(%edx,%ebp,1),%edx
movl 24(%esi), %ebp
xorl %ecx, %edi
addl %edi, %edx
movl %eax, %edi
roll $12, %edx
addl %eax, %edx
/* R0 6 */
xorl %ebx, %edi
andl %edx, %edi
leal 2821735955(%ecx,%ebp,1),%ecx
movl 28(%esi), %ebp
xorl %ebx, %edi
addl %edi, %ecx
movl %edx, %edi
roll $17, %ecx
addl %edx, %ecx
/* R0 7 */
xorl %eax, %edi
andl %ecx, %edi
leal 4249261313(%ebx,%ebp,1),%ebx
movl 32(%esi), %ebp
xorl %eax, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $22, %ebx
addl %ecx, %ebx
/* R0 8 */
xorl %edx, %edi
andl %ebx, %edi
leal 1770035416(%eax,%ebp,1),%eax
movl 36(%esi), %ebp
xorl %edx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $7, %eax
addl %ebx, %eax
/* R0 9 */
xorl %ecx, %edi
andl %eax, %edi
leal 2336552879(%edx,%ebp,1),%edx
movl 40(%esi), %ebp
xorl %ecx, %edi
addl %edi, %edx
movl %eax, %edi
roll $12, %edx
addl %eax, %edx
/* R0 10 */
xorl %ebx, %edi
andl %edx, %edi
leal 4294925233(%ecx,%ebp,1),%ecx
movl 44(%esi), %ebp
xorl %ebx, %edi
addl %edi, %ecx
movl %edx, %edi
roll $17, %ecx
addl %edx, %ecx
/* R0 11 */
xorl %eax, %edi
andl %ecx, %edi
leal 2304563134(%ebx,%ebp,1),%ebx
movl 48(%esi), %ebp
xorl %eax, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $22, %ebx
addl %ecx, %ebx
/* R0 12 */
xorl %edx, %edi
andl %ebx, %edi
leal 1804603682(%eax,%ebp,1),%eax
movl 52(%esi), %ebp
xorl %edx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $7, %eax
addl %ebx, %eax
/* R0 13 */
xorl %ecx, %edi
andl %eax, %edi
leal 4254626195(%edx,%ebp,1),%edx
movl 56(%esi), %ebp
xorl %ecx, %edi
addl %edi, %edx
movl %eax, %edi
roll $12, %edx
addl %eax, %edx
/* R0 14 */
xorl %ebx, %edi
andl %edx, %edi
leal 2792965006(%ecx,%ebp,1),%ecx
movl 60(%esi), %ebp
xorl %ebx, %edi
addl %edi, %ecx
movl %edx, %edi
roll $17, %ecx
addl %edx, %ecx
/* R0 15 */
xorl %eax, %edi
andl %ecx, %edi
leal 1236535329(%ebx,%ebp,1),%ebx
movl 4(%esi), %ebp
xorl %eax, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $22, %ebx
addl %ecx, %ebx
/* R1 section */
/* R1 16 */
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx, %edi
andl %edx, %edi
movl 24(%esi), %ebp
xorl %ecx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $5, %eax
addl %ebx, %eax
/* R1 17 */
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax, %edi
andl %ecx, %edi
movl 44(%esi), %ebp
xorl %ebx, %edi
addl %edi, %edx
movl %eax, %edi
roll $9, %edx
addl %eax, %edx
/* R1 18 */
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx, %edi
andl %ebx, %edi
movl (%esi), %ebp
xorl %eax, %edi
addl %edi, %ecx
movl %edx, %edi
roll $14, %ecx
addl %edx, %ecx
/* R1 19 */
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx, %edi
andl %eax, %edi
movl 20(%esi), %ebp
xorl %edx, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $20, %ebx
addl %ecx, %ebx
/* R1 20 */
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx, %edi
andl %edx, %edi
movl 40(%esi), %ebp
xorl %ecx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $5, %eax
addl %ebx, %eax
/* R1 21 */
leal 38016083(%edx,%ebp,1),%edx
xorl %eax, %edi
andl %ecx, %edi
movl 60(%esi), %ebp
xorl %ebx, %edi
addl %edi, %edx
movl %eax, %edi
roll $9, %edx
addl %eax, %edx
/* R1 22 */
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx, %edi
andl %ebx, %edi
movl 16(%esi), %ebp
xorl %eax, %edi
addl %edi, %ecx
movl %edx, %edi
roll $14, %ecx
addl %edx, %ecx
/* R1 23 */
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx, %edi
andl %eax, %edi
movl 36(%esi), %ebp
xorl %edx, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $20, %ebx
addl %ecx, %ebx
/* R1 24 */
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx, %edi
andl %edx, %edi
movl 56(%esi), %ebp
xorl %ecx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $5, %eax
addl %ebx, %eax
/* R1 25 */
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax, %edi
andl %ecx, %edi
movl 12(%esi), %ebp
xorl %ebx, %edi
addl %edi, %edx
movl %eax, %edi
roll $9, %edx
addl %eax, %edx
/* R1 26 */
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx, %edi
andl %ebx, %edi
movl 32(%esi), %ebp
xorl %eax, %edi
addl %edi, %ecx
movl %edx, %edi
roll $14, %ecx
addl %edx, %ecx
/* R1 27 */
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx, %edi
andl %eax, %edi
movl 52(%esi), %ebp
xorl %edx, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $20, %ebx
addl %ecx, %ebx
/* R1 28 */
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx, %edi
andl %edx, %edi
movl 8(%esi), %ebp
xorl %ecx, %edi
addl %edi, %eax
movl %ebx, %edi
roll $5, %eax
addl %ebx, %eax
/* R1 29 */
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax, %edi
andl %ecx, %edi
movl 28(%esi), %ebp
xorl %ebx, %edi
addl %edi, %edx
movl %eax, %edi
roll $9, %edx
addl %eax, %edx
/* R1 30 */
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx, %edi
andl %ebx, %edi
movl 48(%esi), %ebp
xorl %eax, %edi
addl %edi, %ecx
movl %edx, %edi
roll $14, %ecx
addl %edx, %ecx
/* R1 31 */
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx, %edi
andl %eax, %edi
movl 20(%esi), %ebp
xorl %edx, %edi
addl %edi, %ebx
movl %ecx, %edi
roll $20, %ebx
addl %ecx, %ebx
/* R2 section */
/* R2 32 */
xorl %edx, %edi
xorl %ebx, %edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi, %eax
movl 32(%esi), %ebp
roll $4, %eax
movl %ebx, %edi
/* R2 33 */
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx, %eax
xorl %ecx, %edi
xorl %eax, %edi
movl 44(%esi), %ebp
addl %edi, %edx
movl %eax, %edi
roll $11, %edx
addl %eax, %edx
/* R2 34 */
xorl %ebx, %edi
xorl %edx, %edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi, %ecx
movl 56(%esi), %ebp
roll $16, %ecx
movl %edx, %edi
/* R2 35 */
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx, %ecx
xorl %eax, %edi
xorl %ecx, %edi
movl 4(%esi), %ebp
addl %edi, %ebx
movl %ecx, %edi
roll $23, %ebx
addl %ecx, %ebx
/* R2 36 */
xorl %edx, %edi
xorl %ebx, %edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi, %eax
movl 16(%esi), %ebp
roll $4, %eax
movl %ebx, %edi
/* R2 37 */
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx, %eax
xorl %ecx, %edi
xorl %eax, %edi
movl 28(%esi), %ebp
addl %edi, %edx
movl %eax, %edi
roll $11, %edx
addl %eax, %edx
/* R2 38 */
xorl %ebx, %edi
xorl %edx, %edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi, %ecx
movl 40(%esi), %ebp
roll $16, %ecx
movl %edx, %edi
/* R2 39 */
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx, %ecx
xorl %eax, %edi
xorl %ecx, %edi
movl 52(%esi), %ebp
addl %edi, %ebx
movl %ecx, %edi
roll $23, %ebx
addl %ecx, %ebx
/* R2 40 */
xorl %edx, %edi
xorl %ebx, %edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi, %eax
movl (%esi), %ebp
roll $4, %eax
movl %ebx, %edi
/* R2 41 */
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx, %eax
xorl %ecx, %edi
xorl %eax, %edi
movl 12(%esi), %ebp
addl %edi, %edx
movl %eax, %edi
roll $11, %edx
addl %eax, %edx
/* R2 42 */
xorl %ebx, %edi
xorl %edx, %edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi, %ecx
movl 24(%esi), %ebp
roll $16, %ecx
movl %edx, %edi
/* R2 43 */
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx, %ecx
xorl %eax, %edi
xorl %ecx, %edi
movl 36(%esi), %ebp
addl %edi, %ebx
movl %ecx, %edi
roll $23, %ebx
addl %ecx, %ebx
/* R2 44 */
xorl %edx, %edi
xorl %ebx, %edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi, %eax
movl 48(%esi), %ebp
roll $4, %eax
movl %ebx, %edi
/* R2 45 */
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx, %eax
xorl %ecx, %edi
xorl %eax, %edi
movl 60(%esi), %ebp
addl %edi, %edx
movl %eax, %edi
roll $11, %edx
addl %eax, %edx
/* R2 46 */
xorl %ebx, %edi
xorl %edx, %edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi, %ecx
movl 8(%esi), %ebp
roll $16, %ecx
movl %edx, %edi
/* R2 47 */
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx, %ecx
xorl %eax, %edi
xorl %ecx, %edi
movl (%esi), %ebp
addl %edi, %ebx
movl $-1, %edi
roll $23, %ebx
addl %ecx, %ebx
/* R3 section */
/* R3 48 */
xorl %edx, %edi
orl %ebx, %edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx, %edi
movl 28(%esi), %ebp
addl %edi, %eax
movl $-1, %edi
roll $6, %eax
xorl %ecx, %edi
addl %ebx, %eax
/* R3 49 */
orl %eax, %edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx, %edi
movl 56(%esi), %ebp
addl %edi, %edx
movl $-1, %edi
roll $10, %edx
xorl %ebx, %edi
addl %eax, %edx
/* R3 50 */
orl %edx, %edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax, %edi
movl 20(%esi), %ebp
addl %edi, %ecx
movl $-1, %edi
roll $15, %ecx
xorl %eax, %edi
addl %edx, %ecx
/* R3 51 */
orl %ecx, %edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx, %edi
movl 48(%esi), %ebp
addl %edi, %ebx
movl $-1, %edi
roll $21, %ebx
xorl %edx, %edi
addl %ecx, %ebx
/* R3 52 */
orl %ebx, %edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx, %edi
movl 12(%esi), %ebp
addl %edi, %eax
movl $-1, %edi
roll $6, %eax
xorl %ecx, %edi
addl %ebx, %eax
/* R3 53 */
orl %eax, %edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx, %edi
movl 40(%esi), %ebp
addl %edi, %edx
movl $-1, %edi
roll $10, %edx
xorl %ebx, %edi
addl %eax, %edx
/* R3 54 */
orl %edx, %edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax, %edi
movl 4(%esi), %ebp
addl %edi, %ecx
movl $-1, %edi
roll $15, %ecx
xorl %eax, %edi
addl %edx, %ecx
/* R3 55 */
orl %ecx, %edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx, %edi
movl 32(%esi), %ebp
addl %edi, %ebx
movl $-1, %edi
roll $21, %ebx
xorl %edx, %edi
addl %ecx, %ebx
/* R3 56 */
orl %ebx, %edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx, %edi
movl 60(%esi), %ebp
addl %edi, %eax
movl $-1, %edi
roll $6, %eax
xorl %ecx, %edi
addl %ebx, %eax
/* R3 57 */
orl %eax, %edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx, %edi
movl 24(%esi), %ebp
addl %edi, %edx
movl $-1, %edi
roll $10, %edx
xorl %ebx, %edi
addl %eax, %edx
/* R3 58 */
orl %edx, %edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax, %edi
movl 52(%esi), %ebp
addl %edi, %ecx
movl $-1, %edi
roll $15, %ecx
xorl %eax, %edi
addl %edx, %ecx
/* R3 59 */
orl %ecx, %edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx, %edi
movl 16(%esi), %ebp
addl %edi, %ebx
movl $-1, %edi
roll $21, %ebx
xorl %edx, %edi
addl %ecx, %ebx
/* R3 60 */
orl %ebx, %edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx, %edi
movl 44(%esi), %ebp
addl %edi, %eax
movl $-1, %edi
roll $6, %eax
xorl %ecx, %edi
addl %ebx, %eax
/* R3 61 */
orl %eax, %edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx, %edi
movl 8(%esi), %ebp
addl %edi, %edx
movl $-1, %edi
roll $10, %edx
xorl %ebx, %edi
addl %eax, %edx
/* R3 62 */
orl %edx, %edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax, %edi
movl 36(%esi), %ebp
addl %edi, %ecx
movl $-1, %edi
roll $15, %ecx
xorl %eax, %edi
addl %edx, %ecx
/* R3 63 */
orl %ecx, %edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx, %edi
movl 24(%esp), %ebp
addl %edi, %ebx
addl $64, %esi
roll $21, %ebx
movl (%ebp), %edi
addl %ecx, %ebx
addl %edi, %eax
movl 4(%ebp), %edi
addl %edi, %ebx
movl 8(%ebp), %edi
addl %edi, %ecx
movl 12(%ebp), %edi
addl %edi, %edx
movl %eax, (%ebp)
movl %ebx, 4(%ebp)
movl (%esp), %edi
movl %ecx, 8(%ebp)
movl %edx, 12(%ebp)
cmpl %esi, %edi
jge .L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
.md5_block_x86_end:
SIZE(md5_block_x86,.md5_block_x86_end-md5_block_x86)
.ident "desasm.pl"