x86_64 assembler pack to comply with updated styling x86_64-xlate.pl rules.
This commit is contained in:
parent
8525377265
commit
aa8f38e49b
@ -617,8 +617,7 @@ AES_encrypt:
|
||||
push $key
|
||||
|
||||
# pick Te4 copy which can't "overlap" with stack frame or key schedule
|
||||
.picmeup $sbox
|
||||
lea AES_Te+2048-.($sbox),$sbox
|
||||
lea .LAES_Te+2048(%rip),$sbox
|
||||
lea 768(%rsp),%rbp
|
||||
sub $sbox,%rbp
|
||||
and \$0x300,%rbp
|
||||
@ -1210,8 +1209,7 @@ AES_decrypt:
|
||||
push $key
|
||||
|
||||
# pick Td4 copy which can't "overlap" with stack frame or key schedule
|
||||
.picmeup $sbox
|
||||
lea AES_Td+2048-.($sbox),$sbox
|
||||
lea .LAES_Td+2048(%rip),$sbox
|
||||
lea 768(%rsp),%rbp
|
||||
sub $sbox,%rbp
|
||||
and \$0x300,%rbp
|
||||
@ -1292,8 +1290,7 @@ _x86_64_AES_set_encrypt_key:
|
||||
test \$-1,%rdi
|
||||
jz .Lbadpointer
|
||||
|
||||
.picmeup %rbp
|
||||
lea AES_Te-.(%rbp),%rbp
|
||||
lea .LAES_Te(%rip),%rbp
|
||||
lea 2048+128(%rbp),%rbp
|
||||
|
||||
# prefetch Te4
|
||||
@ -1564,8 +1561,7 @@ AES_set_decrypt_key:
|
||||
cmp %rsi,%rdi
|
||||
jne .Linvert
|
||||
|
||||
.picmeup %rax
|
||||
lea AES_Te+2048+1024-.(%rax),%rax # rcon
|
||||
lea .LAES_Te+2048+1024(%rip),%rax # rcon
|
||||
|
||||
mov 40(%rax),$mask80
|
||||
mov 48(%rax),$maskfe
|
||||
@ -1636,11 +1632,10 @@ AES_cbc_encrypt:
|
||||
cld
|
||||
mov %r9d,%r9d # clear upper half of enc
|
||||
|
||||
.picmeup $sbox
|
||||
lea AES_Te-.($sbox),$sbox
|
||||
lea .LAES_Te(%rip),$sbox
|
||||
cmp \$0,%r9
|
||||
jne .Lcbc_picked_te
|
||||
lea AES_Td-AES_Te($sbox),$sbox
|
||||
lea .LAES_Td(%rip),$sbox
|
||||
.Lcbc_picked_te:
|
||||
|
||||
mov OPENSSL_ia32cap_P(%rip),%eax
|
||||
@ -2066,9 +2061,8 @@ ___
|
||||
}
|
||||
|
||||
$code.=<<___;
|
||||
.globl AES_Te
|
||||
.align 64
|
||||
AES_Te:
|
||||
.LAES_Te:
|
||||
___
|
||||
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
|
||||
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
|
||||
@ -2275,9 +2269,8 @@ $code.=<<___;
|
||||
.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
|
||||
___
|
||||
$code.=<<___;
|
||||
.globl AES_Td
|
||||
.align 64
|
||||
AES_Td:
|
||||
.LAES_Td:
|
||||
___
|
||||
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
|
||||
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
|
||||
|
@ -182,7 +182,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
|
||||
|
||||
asm (
|
||||
" subq %2,%2 \n"
|
||||
".align 16 \n"
|
||||
".p2align 4 \n"
|
||||
"1: movq (%4,%2,8),%0 \n"
|
||||
" adcq (%5,%2,8),%0 \n"
|
||||
" movq %0,(%3,%2,8) \n"
|
||||
@ -205,7 +205,7 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
|
||||
|
||||
asm (
|
||||
" subq %2,%2 \n"
|
||||
".align 16 \n"
|
||||
".p2align 4 \n"
|
||||
"1: movq (%4,%2,8),%0 \n"
|
||||
" sbbq (%5,%2,8),%0 \n"
|
||||
" movq %0,(%3,%2,8) \n"
|
||||
|
@ -336,8 +336,7 @@ RC4_set_key:
|
||||
.type RC4_options,\@function,0
|
||||
.align 16
|
||||
RC4_options:
|
||||
.picmeup %rax
|
||||
lea .Lopts-.(%rax),%rax
|
||||
lea .Lopts(%rip),%rax
|
||||
mov OPENSSL_ia32cap_P(%rip),%edx
|
||||
bt \$20,%edx
|
||||
jnc .Ldone
|
||||
|
@ -40,14 +40,16 @@
|
||||
# sha256_block:-( This is presumably because 64-bit shifts/rotates
|
||||
# apparently are not atomic instructions, but implemented in microcode.
|
||||
|
||||
$output=shift;
|
||||
$flavour = shift;
|
||||
$output = shift;
|
||||
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||
die "can't locate x86_64-xlate.pl";
|
||||
|
||||
open STDOUT,"| $^X $xlate $output";
|
||||
open STDOUT,"| $^X $xlate $flavour $output";
|
||||
|
||||
if ($output =~ /512/) {
|
||||
$func="sha512_block_data_order";
|
||||
@ -196,8 +198,7 @@ $func:
|
||||
mov %rdx,$_end # save end pointer, "3rd" arg
|
||||
mov %rbp,$_rsp # save copy of %rsp
|
||||
|
||||
.picmeup $Tbl
|
||||
lea $TABLE-.($Tbl),$Tbl
|
||||
lea $TABLE(%rip),$Tbl
|
||||
|
||||
mov $SZ*0($ctx),$A
|
||||
mov $SZ*1($ctx),$B
|
||||
|
@ -71,8 +71,7 @@ $func:
|
||||
mov %rdx,16(%rbx)
|
||||
mov %rax,32(%rbx) # saved stack pointer
|
||||
|
||||
.picmeup %rbp
|
||||
lea $table-.(%rbp),%rbp
|
||||
lea $table(%rip),%rbp
|
||||
|
||||
xor %rcx,%rcx
|
||||
xor %rdx,%rdx
|
||||
|
@ -1,110 +1,37 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
$output=shift;
|
||||
$masm=1 if ($output =~ /\.asm/);
|
||||
open STDOUT,">$output" || die "can't open $output: $!";
|
||||
$flavour = shift;
|
||||
$output = shift;
|
||||
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
||||
|
||||
print<<___ if(defined($masm));
|
||||
_TEXT SEGMENT
|
||||
PUBLIC OPENSSL_rdtsc
|
||||
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
||||
|
||||
PUBLIC OPENSSL_atomic_add
|
||||
ALIGN 16
|
||||
OPENSSL_atomic_add PROC
|
||||
mov eax,DWORD PTR[rcx]
|
||||
\$Lspin: lea r8,DWORD PTR[rdx+rax]
|
||||
lock cmpxchg DWORD PTR[rcx],r8d
|
||||
jne \$Lspin
|
||||
mov eax,r8d
|
||||
cdqe
|
||||
ret
|
||||
OPENSSL_atomic_add ENDP
|
||||
|
||||
PUBLIC OPENSSL_wipe_cpu
|
||||
ALIGN 16
|
||||
OPENSSL_wipe_cpu PROC
|
||||
pxor xmm0,xmm0
|
||||
pxor xmm1,xmm1
|
||||
pxor xmm2,xmm2
|
||||
pxor xmm3,xmm3
|
||||
pxor xmm4,xmm4
|
||||
pxor xmm5,xmm5
|
||||
xor rcx,rcx
|
||||
xor rdx,rdx
|
||||
xor r8,r8
|
||||
xor r9,r9
|
||||
xor r10,r10
|
||||
xor r11,r11
|
||||
lea rax,QWORD PTR[rsp+8]
|
||||
ret
|
||||
OPENSSL_wipe_cpu ENDP
|
||||
_TEXT ENDS
|
||||
|
||||
CRT\$XIU SEGMENT
|
||||
EXTRN OPENSSL_cpuid_setup:PROC
|
||||
DQ OPENSSL_cpuid_setup
|
||||
CRT\$XIU ENDS
|
||||
|
||||
___
|
||||
print<<___ if(!defined($masm));
|
||||
.text
|
||||
|
||||
.globl OPENSSL_atomic_add
|
||||
.type OPENSSL_atomic_add,\@function
|
||||
.align 16
|
||||
OPENSSL_atomic_add:
|
||||
movl (%rdi),%eax
|
||||
.Lspin: leaq (%rsi,%rax),%r8
|
||||
lock; cmpxchgl %r8d,(%rdi)
|
||||
jne .Lspin
|
||||
movl %r8d,%eax
|
||||
.byte 0x48,0x98
|
||||
ret
|
||||
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
||||
|
||||
.globl OPENSSL_wipe_cpu
|
||||
.type OPENSSL_wipe_cpu,\@function
|
||||
.align 16
|
||||
OPENSSL_wipe_cpu:
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
pxor %xmm8,%xmm8
|
||||
pxor %xmm9,%xmm9
|
||||
pxor %xmm10,%xmm10
|
||||
pxor %xmm11,%xmm11
|
||||
pxor %xmm12,%xmm12
|
||||
pxor %xmm13,%xmm13
|
||||
pxor %xmm14,%xmm14
|
||||
pxor %xmm15,%xmm15
|
||||
xorq %rcx,%rcx
|
||||
xorq %rdx,%rdx
|
||||
xorq %rsi,%rsi
|
||||
xorq %rdi,%rdi
|
||||
xorq %r8,%r8
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
leaq 8(%rsp),%rax
|
||||
ret
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
|
||||
|
||||
if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
|
||||
else { $arg1="%rdi"; $arg2="%rsi"; }
|
||||
print<<___;
|
||||
.extern OPENSSL_cpuid_setup
|
||||
.section .init
|
||||
call OPENSSL_cpuid_setup
|
||||
|
||||
___
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $output";
|
||||
|
||||
print<<___;
|
||||
.text
|
||||
|
||||
.globl OPENSSL_atomic_add
|
||||
.type OPENSSL_atomic_add,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_atomic_add:
|
||||
movl ($arg1),%eax
|
||||
.Lspin: leaq ($arg2,%rax),%r8
|
||||
.byte 0xf0 # lock
|
||||
cmpxchgl %r8d,($arg1)
|
||||
jne .Lspin
|
||||
movl %r8d,%eax
|
||||
.byte 0x48,0x98 # cltq/cdqe
|
||||
ret
|
||||
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
||||
|
||||
.globl OPENSSL_rdtsc
|
||||
.type OPENSSL_rdtsc,\@abi-omnipotent
|
||||
.align 16
|
||||
@ -159,35 +86,91 @@ OPENSSL_ia32_cpuid:
|
||||
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
||||
|
||||
.globl OPENSSL_cleanse
|
||||
.type OPENSSL_cleanse,\@function,2
|
||||
.type OPENSSL_cleanse,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_cleanse:
|
||||
xor %rax,%rax
|
||||
cmp \$15,%rsi
|
||||
cmp \$15,$arg2
|
||||
jae .Lot
|
||||
.Little:
|
||||
mov %al,(%rdi)
|
||||
sub \$1,%rsi
|
||||
lea 1(%rdi),%rdi
|
||||
mov %al,($arg1)
|
||||
sub \$1,$arg2
|
||||
lea 1($arg1),$arg1
|
||||
jnz .Little
|
||||
ret
|
||||
.align 16
|
||||
.Lot:
|
||||
test \$7,%rdi
|
||||
test \$7,$arg1
|
||||
jz .Laligned
|
||||
mov %al,(%rdi)
|
||||
lea -1(%rsi),%rsi
|
||||
lea 1(%rdi),%rdi
|
||||
mov %al,($arg1)
|
||||
lea -1($arg2),$arg2
|
||||
lea 1($arg1),$arg1
|
||||
jmp .Lot
|
||||
.Laligned:
|
||||
mov %rax,(%rdi)
|
||||
lea -8(%rsi),%rsi
|
||||
test \$-8,%rsi
|
||||
lea 8(%rdi),%rdi
|
||||
mov %rax,($arg1)
|
||||
lea -8($arg2),$arg2
|
||||
test \$-8,$arg2
|
||||
lea 8($arg1),$arg1
|
||||
jnz .Laligned
|
||||
cmp \$0,%rsi
|
||||
cmp \$0,$arg2
|
||||
jne .Little
|
||||
ret
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
___
|
||||
|
||||
print<<___ if (!$win64);
|
||||
.globl OPENSSL_wipe_cpu
|
||||
.type OPENSSL_wipe_cpu,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_wipe_cpu:
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
pxor %xmm8,%xmm8
|
||||
pxor %xmm9,%xmm9
|
||||
pxor %xmm10,%xmm10
|
||||
pxor %xmm11,%xmm11
|
||||
pxor %xmm12,%xmm12
|
||||
pxor %xmm13,%xmm13
|
||||
pxor %xmm14,%xmm14
|
||||
pxor %xmm15,%xmm15
|
||||
xorq %rcx,%rcx
|
||||
xorq %rdx,%rdx
|
||||
xorq %rsi,%rsi
|
||||
xorq %rdi,%rdi
|
||||
xorq %r8,%r8
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
leaq 8(%rsp),%rax
|
||||
ret
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
___
|
||||
print<<___ if ($win64);
|
||||
.globl OPENSSL_wipe_cpu
|
||||
.type OPENSSL_wipe_cpu,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_wipe_cpu:
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
xorq %rcx,%rcx
|
||||
xorq %rdx,%rdx
|
||||
xorq %r8,%r8
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
leaq 8(%rsp),%rax
|
||||
ret
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
___
|
||||
|
||||
close STDOUT; # flush
|
||||
|
Loading…
Reference in New Issue
Block a user