x86_64 assembler pack to comply with updated styling x86_64-xlate.pl rules.

This commit is contained in:
Andy Polyakov 2008-11-12 08:15:52 +00:00
parent 8525377265
commit aa8f38e49b
6 changed files with 111 additions and 136 deletions

View File

@ -617,8 +617,7 @@ AES_encrypt:
push $key push $key
# pick Te4 copy which can't "overlap" with stack frame or key schedule # pick Te4 copy which can't "overlap" with stack frame or key schedule
.picmeup $sbox lea .LAES_Te+2048(%rip),$sbox
lea AES_Te+2048-.($sbox),$sbox
lea 768(%rsp),%rbp lea 768(%rsp),%rbp
sub $sbox,%rbp sub $sbox,%rbp
and \$0x300,%rbp and \$0x300,%rbp
@ -1210,8 +1209,7 @@ AES_decrypt:
push $key push $key
# pick Td4 copy which can't "overlap" with stack frame or key schedule # pick Td4 copy which can't "overlap" with stack frame or key schedule
.picmeup $sbox lea .LAES_Td+2048(%rip),$sbox
lea AES_Td+2048-.($sbox),$sbox
lea 768(%rsp),%rbp lea 768(%rsp),%rbp
sub $sbox,%rbp sub $sbox,%rbp
and \$0x300,%rbp and \$0x300,%rbp
@ -1292,8 +1290,7 @@ _x86_64_AES_set_encrypt_key:
test \$-1,%rdi test \$-1,%rdi
jz .Lbadpointer jz .Lbadpointer
.picmeup %rbp lea .LAES_Te(%rip),%rbp
lea AES_Te-.(%rbp),%rbp
lea 2048+128(%rbp),%rbp lea 2048+128(%rbp),%rbp
# prefetch Te4 # prefetch Te4
@ -1564,8 +1561,7 @@ AES_set_decrypt_key:
cmp %rsi,%rdi cmp %rsi,%rdi
jne .Linvert jne .Linvert
.picmeup %rax lea .LAES_Te+2048+1024(%rip),%rax # rcon
lea AES_Te+2048+1024-.(%rax),%rax # rcon
mov 40(%rax),$mask80 mov 40(%rax),$mask80
mov 48(%rax),$maskfe mov 48(%rax),$maskfe
@ -1636,11 +1632,10 @@ AES_cbc_encrypt:
cld cld
mov %r9d,%r9d # clear upper half of enc mov %r9d,%r9d # clear upper half of enc
.picmeup $sbox lea .LAES_Te(%rip),$sbox
lea AES_Te-.($sbox),$sbox
cmp \$0,%r9 cmp \$0,%r9
jne .Lcbc_picked_te jne .Lcbc_picked_te
lea AES_Td-AES_Te($sbox),$sbox lea .LAES_Td(%rip),$sbox
.Lcbc_picked_te: .Lcbc_picked_te:
mov OPENSSL_ia32cap_P(%rip),%eax mov OPENSSL_ia32cap_P(%rip),%eax
@ -2066,9 +2061,8 @@ ___
} }
$code.=<<___; $code.=<<___;
.globl AES_Te
.align 64 .align 64
AES_Te: .LAES_Te:
___ ___
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
@ -2275,9 +2269,8 @@ $code.=<<___;
.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
___ ___
$code.=<<___; $code.=<<___;
.globl AES_Td
.align 64 .align 64
AES_Td: .LAES_Td:
___ ___
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);

View File

@ -182,7 +182,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
asm ( asm (
" subq %2,%2 \n" " subq %2,%2 \n"
".align 16 \n" ".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n" "1: movq (%4,%2,8),%0 \n"
" adcq (%5,%2,8),%0 \n" " adcq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n" " movq %0,(%3,%2,8) \n"
@ -205,7 +205,7 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
asm ( asm (
" subq %2,%2 \n" " subq %2,%2 \n"
".align 16 \n" ".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n" "1: movq (%4,%2,8),%0 \n"
" sbbq (%5,%2,8),%0 \n" " sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n" " movq %0,(%3,%2,8) \n"

View File

@ -336,8 +336,7 @@ RC4_set_key:
.type RC4_options,\@function,0 .type RC4_options,\@function,0
.align 16 .align 16
RC4_options: RC4_options:
.picmeup %rax lea .Lopts(%rip),%rax
lea .Lopts-.(%rax),%rax
mov OPENSSL_ia32cap_P(%rip),%edx mov OPENSSL_ia32cap_P(%rip),%edx
bt \$20,%edx bt \$20,%edx
jnc .Ldone jnc .Ldone

View File

@ -40,14 +40,16 @@
# sha256_block:-( This is presumably because 64-bit shifts/rotates # sha256_block:-( This is presumably because 64-bit shifts/rotates
# apparently are not atomic instructions, but implemented in microcode. # apparently are not atomic instructions, but implemented in microcode.
$flavour = shift;
$output = shift; $output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl"; die "can't locate x86_64-xlate.pl";
open STDOUT,"| $^X $xlate $output"; open STDOUT,"| $^X $xlate $flavour $output";
if ($output =~ /512/) { if ($output =~ /512/) {
$func="sha512_block_data_order"; $func="sha512_block_data_order";
@ -196,8 +198,7 @@ $func:
mov %rdx,$_end # save end pointer, "3rd" arg mov %rdx,$_end # save end pointer, "3rd" arg
mov %rbp,$_rsp # save copy of %rsp mov %rbp,$_rsp # save copy of %rsp
.picmeup $Tbl lea $TABLE(%rip),$Tbl
lea $TABLE-.($Tbl),$Tbl
mov $SZ*0($ctx),$A mov $SZ*0($ctx),$A
mov $SZ*1($ctx),$B mov $SZ*1($ctx),$B

View File

@ -71,8 +71,7 @@ $func:
mov %rdx,16(%rbx) mov %rdx,16(%rbx)
mov %rax,32(%rbx) # saved stack pointer mov %rax,32(%rbx) # saved stack pointer
.picmeup %rbp lea $table(%rip),%rbp
lea $table-.(%rbp),%rbp
xor %rcx,%rcx xor %rcx,%rcx
xor %rdx,%rdx xor %rdx,%rdx

View File

@ -1,110 +1,37 @@
#!/usr/bin/env perl #!/usr/bin/env perl
$flavour = shift;
$output = shift; $output = shift;
$masm=1 if ($output =~ /\.asm/); if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
open STDOUT,">$output" || die "can't open $output: $!";
print<<___ if(defined($masm)); $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
_TEXT SEGMENT
PUBLIC OPENSSL_rdtsc
PUBLIC OPENSSL_atomic_add $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
ALIGN 16 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
OPENSSL_atomic_add PROC
mov eax,DWORD PTR[rcx]
\$Lspin: lea r8,DWORD PTR[rdx+rax]
lock cmpxchg DWORD PTR[rcx],r8d
jne \$Lspin
mov eax,r8d
cdqe
ret
OPENSSL_atomic_add ENDP
PUBLIC OPENSSL_wipe_cpu
ALIGN 16
OPENSSL_wipe_cpu PROC
pxor xmm0,xmm0
pxor xmm1,xmm1
pxor xmm2,xmm2
pxor xmm3,xmm3
pxor xmm4,xmm4
pxor xmm5,xmm5
xor rcx,rcx
xor rdx,rdx
xor r8,r8
xor r9,r9
xor r10,r10
xor r11,r11
lea rax,QWORD PTR[rsp+8]
ret
OPENSSL_wipe_cpu ENDP
_TEXT ENDS
CRT\$XIU SEGMENT
EXTRN OPENSSL_cpuid_setup:PROC
DQ OPENSSL_cpuid_setup
CRT\$XIU ENDS
___
print<<___ if(!defined($masm));
.text
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,\@function
.align 16
OPENSSL_atomic_add:
movl (%rdi),%eax
.Lspin: leaq (%rsi,%rax),%r8
lock; cmpxchgl %r8d,(%rdi)
jne .Lspin
movl %r8d,%eax
.byte 0x48,0x98
ret
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,\@function
.align 16
OPENSSL_wipe_cpu:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
pxor %xmm8,%xmm8
pxor %xmm9,%xmm9
pxor %xmm10,%xmm10
pxor %xmm11,%xmm11
pxor %xmm12,%xmm12
pxor %xmm13,%xmm13
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %rsi,%rsi
xorq %rdi,%rdi
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
else { $arg1="%rdi"; $arg2="%rsi"; }
print<<___;
.extern OPENSSL_cpuid_setup
.section .init .section .init
call OPENSSL_cpuid_setup call OPENSSL_cpuid_setup
___
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $output";
print<<___;
.text .text
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,\@abi-omnipotent
.align 16
OPENSSL_atomic_add:
movl ($arg1),%eax
.Lspin: leaq ($arg2,%rax),%r8
.byte 0xf0 # lock
cmpxchgl %r8d,($arg1)
jne .Lspin
movl %r8d,%eax
.byte 0x48,0x98 # cltq/cdqe
ret
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_rdtsc .globl OPENSSL_rdtsc
.type OPENSSL_rdtsc,\@abi-omnipotent .type OPENSSL_rdtsc,\@abi-omnipotent
.align 16 .align 16
@ -159,35 +86,91 @@ OPENSSL_ia32_cpuid:
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse .globl OPENSSL_cleanse
.type OPENSSL_cleanse,\@function,2 .type OPENSSL_cleanse,\@abi-omnipotent
.align 16 .align 16
OPENSSL_cleanse: OPENSSL_cleanse:
xor %rax,%rax xor %rax,%rax
cmp \$15,%rsi cmp \$15,$arg2
jae .Lot jae .Lot
.Little: .Little:
mov %al,(%rdi) mov %al,($arg1)
sub \$1,%rsi sub \$1,$arg2
lea 1(%rdi),%rdi lea 1($arg1),$arg1
jnz .Little jnz .Little
ret ret
.align 16 .align 16
.Lot: .Lot:
test \$7,%rdi test \$7,$arg1
jz .Laligned jz .Laligned
mov %al,(%rdi) mov %al,($arg1)
lea -1(%rsi),%rsi lea -1($arg2),$arg2
lea 1(%rdi),%rdi lea 1($arg1),$arg1
jmp .Lot jmp .Lot
.Laligned: .Laligned:
mov %rax,(%rdi) mov %rax,($arg1)
lea -8(%rsi),%rsi lea -8($arg2),$arg2
test \$-8,%rsi test \$-8,$arg2
lea 8(%rdi),%rdi lea 8($arg1),$arg1
jnz .Laligned jnz .Laligned
cmp \$0,%rsi cmp \$0,$arg2
jne .Little jne .Little
ret ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse .size OPENSSL_cleanse,.-OPENSSL_cleanse
___ ___
print<<___ if (!$win64);
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,\@abi-omnipotent
.align 16
OPENSSL_wipe_cpu:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
pxor %xmm8,%xmm8
pxor %xmm9,%xmm9
pxor %xmm10,%xmm10
pxor %xmm11,%xmm11
pxor %xmm12,%xmm12
pxor %xmm13,%xmm13
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %rsi,%rsi
xorq %rdi,%rdi
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
print<<___ if ($win64);
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,\@abi-omnipotent
.align 16
OPENSSL_wipe_cpu:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
close STDOUT; # flush close STDOUT; # flush