x86_64 assembler pack to comply with updated styling x86_64-xlate.pl rules.
This commit is contained in:
parent
8525377265
commit
aa8f38e49b
@ -617,8 +617,7 @@ AES_encrypt:
|
|||||||
push $key
|
push $key
|
||||||
|
|
||||||
# pick Te4 copy which can't "overlap" with stack frame or key schedule
|
# pick Te4 copy which can't "overlap" with stack frame or key schedule
|
||||||
.picmeup $sbox
|
lea .LAES_Te+2048(%rip),$sbox
|
||||||
lea AES_Te+2048-.($sbox),$sbox
|
|
||||||
lea 768(%rsp),%rbp
|
lea 768(%rsp),%rbp
|
||||||
sub $sbox,%rbp
|
sub $sbox,%rbp
|
||||||
and \$0x300,%rbp
|
and \$0x300,%rbp
|
||||||
@ -1210,8 +1209,7 @@ AES_decrypt:
|
|||||||
push $key
|
push $key
|
||||||
|
|
||||||
# pick Td4 copy which can't "overlap" with stack frame or key schedule
|
# pick Td4 copy which can't "overlap" with stack frame or key schedule
|
||||||
.picmeup $sbox
|
lea .LAES_Td+2048(%rip),$sbox
|
||||||
lea AES_Td+2048-.($sbox),$sbox
|
|
||||||
lea 768(%rsp),%rbp
|
lea 768(%rsp),%rbp
|
||||||
sub $sbox,%rbp
|
sub $sbox,%rbp
|
||||||
and \$0x300,%rbp
|
and \$0x300,%rbp
|
||||||
@ -1292,8 +1290,7 @@ _x86_64_AES_set_encrypt_key:
|
|||||||
test \$-1,%rdi
|
test \$-1,%rdi
|
||||||
jz .Lbadpointer
|
jz .Lbadpointer
|
||||||
|
|
||||||
.picmeup %rbp
|
lea .LAES_Te(%rip),%rbp
|
||||||
lea AES_Te-.(%rbp),%rbp
|
|
||||||
lea 2048+128(%rbp),%rbp
|
lea 2048+128(%rbp),%rbp
|
||||||
|
|
||||||
# prefetch Te4
|
# prefetch Te4
|
||||||
@ -1564,8 +1561,7 @@ AES_set_decrypt_key:
|
|||||||
cmp %rsi,%rdi
|
cmp %rsi,%rdi
|
||||||
jne .Linvert
|
jne .Linvert
|
||||||
|
|
||||||
.picmeup %rax
|
lea .LAES_Te+2048+1024(%rip),%rax # rcon
|
||||||
lea AES_Te+2048+1024-.(%rax),%rax # rcon
|
|
||||||
|
|
||||||
mov 40(%rax),$mask80
|
mov 40(%rax),$mask80
|
||||||
mov 48(%rax),$maskfe
|
mov 48(%rax),$maskfe
|
||||||
@ -1636,11 +1632,10 @@ AES_cbc_encrypt:
|
|||||||
cld
|
cld
|
||||||
mov %r9d,%r9d # clear upper half of enc
|
mov %r9d,%r9d # clear upper half of enc
|
||||||
|
|
||||||
.picmeup $sbox
|
lea .LAES_Te(%rip),$sbox
|
||||||
lea AES_Te-.($sbox),$sbox
|
|
||||||
cmp \$0,%r9
|
cmp \$0,%r9
|
||||||
jne .Lcbc_picked_te
|
jne .Lcbc_picked_te
|
||||||
lea AES_Td-AES_Te($sbox),$sbox
|
lea .LAES_Td(%rip),$sbox
|
||||||
.Lcbc_picked_te:
|
.Lcbc_picked_te:
|
||||||
|
|
||||||
mov OPENSSL_ia32cap_P(%rip),%eax
|
mov OPENSSL_ia32cap_P(%rip),%eax
|
||||||
@ -2066,9 +2061,8 @@ ___
|
|||||||
}
|
}
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.globl AES_Te
|
|
||||||
.align 64
|
.align 64
|
||||||
AES_Te:
|
.LAES_Te:
|
||||||
___
|
___
|
||||||
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
|
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
|
||||||
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
|
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
|
||||||
@ -2275,9 +2269,8 @@ $code.=<<___;
|
|||||||
.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
|
.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.globl AES_Td
|
|
||||||
.align 64
|
.align 64
|
||||||
AES_Td:
|
.LAES_Td:
|
||||||
___
|
___
|
||||||
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
|
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
|
||||||
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
|
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
|
||||||
|
@ -182,7 +182,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
|
|||||||
|
|
||||||
asm (
|
asm (
|
||||||
" subq %2,%2 \n"
|
" subq %2,%2 \n"
|
||||||
".align 16 \n"
|
".p2align 4 \n"
|
||||||
"1: movq (%4,%2,8),%0 \n"
|
"1: movq (%4,%2,8),%0 \n"
|
||||||
" adcq (%5,%2,8),%0 \n"
|
" adcq (%5,%2,8),%0 \n"
|
||||||
" movq %0,(%3,%2,8) \n"
|
" movq %0,(%3,%2,8) \n"
|
||||||
@ -205,7 +205,7 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
|
|||||||
|
|
||||||
asm (
|
asm (
|
||||||
" subq %2,%2 \n"
|
" subq %2,%2 \n"
|
||||||
".align 16 \n"
|
".p2align 4 \n"
|
||||||
"1: movq (%4,%2,8),%0 \n"
|
"1: movq (%4,%2,8),%0 \n"
|
||||||
" sbbq (%5,%2,8),%0 \n"
|
" sbbq (%5,%2,8),%0 \n"
|
||||||
" movq %0,(%3,%2,8) \n"
|
" movq %0,(%3,%2,8) \n"
|
||||||
|
@ -336,8 +336,7 @@ RC4_set_key:
|
|||||||
.type RC4_options,\@function,0
|
.type RC4_options,\@function,0
|
||||||
.align 16
|
.align 16
|
||||||
RC4_options:
|
RC4_options:
|
||||||
.picmeup %rax
|
lea .Lopts(%rip),%rax
|
||||||
lea .Lopts-.(%rax),%rax
|
|
||||||
mov OPENSSL_ia32cap_P(%rip),%edx
|
mov OPENSSL_ia32cap_P(%rip),%edx
|
||||||
bt \$20,%edx
|
bt \$20,%edx
|
||||||
jnc .Ldone
|
jnc .Ldone
|
||||||
|
@ -40,14 +40,16 @@
|
|||||||
# sha256_block:-( This is presumably because 64-bit shifts/rotates
|
# sha256_block:-( This is presumably because 64-bit shifts/rotates
|
||||||
# apparently are not atomic instructions, but implemented in microcode.
|
# apparently are not atomic instructions, but implemented in microcode.
|
||||||
|
|
||||||
$output=shift;
|
$flavour = shift;
|
||||||
|
$output = shift;
|
||||||
|
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
||||||
|
|
||||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||||
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
||||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||||
die "can't locate x86_64-xlate.pl";
|
die "can't locate x86_64-xlate.pl";
|
||||||
|
|
||||||
open STDOUT,"| $^X $xlate $output";
|
open STDOUT,"| $^X $xlate $flavour $output";
|
||||||
|
|
||||||
if ($output =~ /512/) {
|
if ($output =~ /512/) {
|
||||||
$func="sha512_block_data_order";
|
$func="sha512_block_data_order";
|
||||||
@ -196,8 +198,7 @@ $func:
|
|||||||
mov %rdx,$_end # save end pointer, "3rd" arg
|
mov %rdx,$_end # save end pointer, "3rd" arg
|
||||||
mov %rbp,$_rsp # save copy of %rsp
|
mov %rbp,$_rsp # save copy of %rsp
|
||||||
|
|
||||||
.picmeup $Tbl
|
lea $TABLE(%rip),$Tbl
|
||||||
lea $TABLE-.($Tbl),$Tbl
|
|
||||||
|
|
||||||
mov $SZ*0($ctx),$A
|
mov $SZ*0($ctx),$A
|
||||||
mov $SZ*1($ctx),$B
|
mov $SZ*1($ctx),$B
|
||||||
|
@ -71,8 +71,7 @@ $func:
|
|||||||
mov %rdx,16(%rbx)
|
mov %rdx,16(%rbx)
|
||||||
mov %rax,32(%rbx) # saved stack pointer
|
mov %rax,32(%rbx) # saved stack pointer
|
||||||
|
|
||||||
.picmeup %rbp
|
lea $table(%rip),%rbp
|
||||||
lea $table-.(%rbp),%rbp
|
|
||||||
|
|
||||||
xor %rcx,%rcx
|
xor %rcx,%rcx
|
||||||
xor %rdx,%rdx
|
xor %rdx,%rdx
|
||||||
|
@ -1,110 +1,37 @@
|
|||||||
#!/usr/bin/env perl
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
$output=shift;
|
$flavour = shift;
|
||||||
$masm=1 if ($output =~ /\.asm/);
|
$output = shift;
|
||||||
open STDOUT,">$output" || die "can't open $output: $!";
|
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
||||||
|
|
||||||
print<<___ if(defined($masm));
|
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
||||||
_TEXT SEGMENT
|
|
||||||
PUBLIC OPENSSL_rdtsc
|
|
||||||
|
|
||||||
PUBLIC OPENSSL_atomic_add
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||||
ALIGN 16
|
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
|
||||||
OPENSSL_atomic_add PROC
|
|
||||||
mov eax,DWORD PTR[rcx]
|
|
||||||
\$Lspin: lea r8,DWORD PTR[rdx+rax]
|
|
||||||
lock cmpxchg DWORD PTR[rcx],r8d
|
|
||||||
jne \$Lspin
|
|
||||||
mov eax,r8d
|
|
||||||
cdqe
|
|
||||||
ret
|
|
||||||
OPENSSL_atomic_add ENDP
|
|
||||||
|
|
||||||
PUBLIC OPENSSL_wipe_cpu
|
|
||||||
ALIGN 16
|
|
||||||
OPENSSL_wipe_cpu PROC
|
|
||||||
pxor xmm0,xmm0
|
|
||||||
pxor xmm1,xmm1
|
|
||||||
pxor xmm2,xmm2
|
|
||||||
pxor xmm3,xmm3
|
|
||||||
pxor xmm4,xmm4
|
|
||||||
pxor xmm5,xmm5
|
|
||||||
xor rcx,rcx
|
|
||||||
xor rdx,rdx
|
|
||||||
xor r8,r8
|
|
||||||
xor r9,r9
|
|
||||||
xor r10,r10
|
|
||||||
xor r11,r11
|
|
||||||
lea rax,QWORD PTR[rsp+8]
|
|
||||||
ret
|
|
||||||
OPENSSL_wipe_cpu ENDP
|
|
||||||
_TEXT ENDS
|
|
||||||
|
|
||||||
CRT\$XIU SEGMENT
|
|
||||||
EXTRN OPENSSL_cpuid_setup:PROC
|
|
||||||
DQ OPENSSL_cpuid_setup
|
|
||||||
CRT\$XIU ENDS
|
|
||||||
|
|
||||||
___
|
|
||||||
print<<___ if(!defined($masm));
|
|
||||||
.text
|
|
||||||
|
|
||||||
.globl OPENSSL_atomic_add
|
|
||||||
.type OPENSSL_atomic_add,\@function
|
|
||||||
.align 16
|
|
||||||
OPENSSL_atomic_add:
|
|
||||||
movl (%rdi),%eax
|
|
||||||
.Lspin: leaq (%rsi,%rax),%r8
|
|
||||||
lock; cmpxchgl %r8d,(%rdi)
|
|
||||||
jne .Lspin
|
|
||||||
movl %r8d,%eax
|
|
||||||
.byte 0x48,0x98
|
|
||||||
ret
|
|
||||||
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
|
||||||
|
|
||||||
.globl OPENSSL_wipe_cpu
|
|
||||||
.type OPENSSL_wipe_cpu,\@function
|
|
||||||
.align 16
|
|
||||||
OPENSSL_wipe_cpu:
|
|
||||||
pxor %xmm0,%xmm0
|
|
||||||
pxor %xmm1,%xmm1
|
|
||||||
pxor %xmm2,%xmm2
|
|
||||||
pxor %xmm3,%xmm3
|
|
||||||
pxor %xmm4,%xmm4
|
|
||||||
pxor %xmm5,%xmm5
|
|
||||||
pxor %xmm6,%xmm6
|
|
||||||
pxor %xmm7,%xmm7
|
|
||||||
pxor %xmm8,%xmm8
|
|
||||||
pxor %xmm9,%xmm9
|
|
||||||
pxor %xmm10,%xmm10
|
|
||||||
pxor %xmm11,%xmm11
|
|
||||||
pxor %xmm12,%xmm12
|
|
||||||
pxor %xmm13,%xmm13
|
|
||||||
pxor %xmm14,%xmm14
|
|
||||||
pxor %xmm15,%xmm15
|
|
||||||
xorq %rcx,%rcx
|
|
||||||
xorq %rdx,%rdx
|
|
||||||
xorq %rsi,%rsi
|
|
||||||
xorq %rdi,%rdi
|
|
||||||
xorq %r8,%r8
|
|
||||||
xorq %r9,%r9
|
|
||||||
xorq %r10,%r10
|
|
||||||
xorq %r11,%r11
|
|
||||||
leaq 8(%rsp),%rax
|
|
||||||
ret
|
|
||||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
|
||||||
|
|
||||||
|
if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
|
||||||
|
else { $arg1="%rdi"; $arg2="%rsi"; }
|
||||||
|
print<<___;
|
||||||
|
.extern OPENSSL_cpuid_setup
|
||||||
.section .init
|
.section .init
|
||||||
call OPENSSL_cpuid_setup
|
call OPENSSL_cpuid_setup
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
||||||
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $output";
|
|
||||||
|
|
||||||
print<<___;
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
|
.globl OPENSSL_atomic_add
|
||||||
|
.type OPENSSL_atomic_add,\@abi-omnipotent
|
||||||
|
.align 16
|
||||||
|
OPENSSL_atomic_add:
|
||||||
|
movl ($arg1),%eax
|
||||||
|
.Lspin: leaq ($arg2,%rax),%r8
|
||||||
|
.byte 0xf0 # lock
|
||||||
|
cmpxchgl %r8d,($arg1)
|
||||||
|
jne .Lspin
|
||||||
|
movl %r8d,%eax
|
||||||
|
.byte 0x48,0x98 # cltq/cdqe
|
||||||
|
ret
|
||||||
|
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
||||||
|
|
||||||
.globl OPENSSL_rdtsc
|
.globl OPENSSL_rdtsc
|
||||||
.type OPENSSL_rdtsc,\@abi-omnipotent
|
.type OPENSSL_rdtsc,\@abi-omnipotent
|
||||||
.align 16
|
.align 16
|
||||||
@ -159,35 +86,91 @@ OPENSSL_ia32_cpuid:
|
|||||||
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
||||||
|
|
||||||
.globl OPENSSL_cleanse
|
.globl OPENSSL_cleanse
|
||||||
.type OPENSSL_cleanse,\@function,2
|
.type OPENSSL_cleanse,\@abi-omnipotent
|
||||||
.align 16
|
.align 16
|
||||||
OPENSSL_cleanse:
|
OPENSSL_cleanse:
|
||||||
xor %rax,%rax
|
xor %rax,%rax
|
||||||
cmp \$15,%rsi
|
cmp \$15,$arg2
|
||||||
jae .Lot
|
jae .Lot
|
||||||
.Little:
|
.Little:
|
||||||
mov %al,(%rdi)
|
mov %al,($arg1)
|
||||||
sub \$1,%rsi
|
sub \$1,$arg2
|
||||||
lea 1(%rdi),%rdi
|
lea 1($arg1),$arg1
|
||||||
jnz .Little
|
jnz .Little
|
||||||
ret
|
ret
|
||||||
.align 16
|
.align 16
|
||||||
.Lot:
|
.Lot:
|
||||||
test \$7,%rdi
|
test \$7,$arg1
|
||||||
jz .Laligned
|
jz .Laligned
|
||||||
mov %al,(%rdi)
|
mov %al,($arg1)
|
||||||
lea -1(%rsi),%rsi
|
lea -1($arg2),$arg2
|
||||||
lea 1(%rdi),%rdi
|
lea 1($arg1),$arg1
|
||||||
jmp .Lot
|
jmp .Lot
|
||||||
.Laligned:
|
.Laligned:
|
||||||
mov %rax,(%rdi)
|
mov %rax,($arg1)
|
||||||
lea -8(%rsi),%rsi
|
lea -8($arg2),$arg2
|
||||||
test \$-8,%rsi
|
test \$-8,$arg2
|
||||||
lea 8(%rdi),%rdi
|
lea 8($arg1),$arg1
|
||||||
jnz .Laligned
|
jnz .Laligned
|
||||||
cmp \$0,%rsi
|
cmp \$0,$arg2
|
||||||
jne .Little
|
jne .Little
|
||||||
ret
|
ret
|
||||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||||
___
|
___
|
||||||
|
|
||||||
|
print<<___ if (!$win64);
|
||||||
|
.globl OPENSSL_wipe_cpu
|
||||||
|
.type OPENSSL_wipe_cpu,\@abi-omnipotent
|
||||||
|
.align 16
|
||||||
|
OPENSSL_wipe_cpu:
|
||||||
|
pxor %xmm0,%xmm0
|
||||||
|
pxor %xmm1,%xmm1
|
||||||
|
pxor %xmm2,%xmm2
|
||||||
|
pxor %xmm3,%xmm3
|
||||||
|
pxor %xmm4,%xmm4
|
||||||
|
pxor %xmm5,%xmm5
|
||||||
|
pxor %xmm6,%xmm6
|
||||||
|
pxor %xmm7,%xmm7
|
||||||
|
pxor %xmm8,%xmm8
|
||||||
|
pxor %xmm9,%xmm9
|
||||||
|
pxor %xmm10,%xmm10
|
||||||
|
pxor %xmm11,%xmm11
|
||||||
|
pxor %xmm12,%xmm12
|
||||||
|
pxor %xmm13,%xmm13
|
||||||
|
pxor %xmm14,%xmm14
|
||||||
|
pxor %xmm15,%xmm15
|
||||||
|
xorq %rcx,%rcx
|
||||||
|
xorq %rdx,%rdx
|
||||||
|
xorq %rsi,%rsi
|
||||||
|
xorq %rdi,%rdi
|
||||||
|
xorq %r8,%r8
|
||||||
|
xorq %r9,%r9
|
||||||
|
xorq %r10,%r10
|
||||||
|
xorq %r11,%r11
|
||||||
|
leaq 8(%rsp),%rax
|
||||||
|
ret
|
||||||
|
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||||
|
___
|
||||||
|
print<<___ if ($win64);
|
||||||
|
.globl OPENSSL_wipe_cpu
|
||||||
|
.type OPENSSL_wipe_cpu,\@abi-omnipotent
|
||||||
|
.align 16
|
||||||
|
OPENSSL_wipe_cpu:
|
||||||
|
pxor %xmm0,%xmm0
|
||||||
|
pxor %xmm1,%xmm1
|
||||||
|
pxor %xmm2,%xmm2
|
||||||
|
pxor %xmm3,%xmm3
|
||||||
|
pxor %xmm4,%xmm4
|
||||||
|
pxor %xmm5,%xmm5
|
||||||
|
xorq %rcx,%rcx
|
||||||
|
xorq %rdx,%rdx
|
||||||
|
xorq %r8,%r8
|
||||||
|
xorq %r9,%r9
|
||||||
|
xorq %r10,%r10
|
||||||
|
xorq %r11,%r11
|
||||||
|
leaq 8(%rsp),%rax
|
||||||
|
ret
|
||||||
|
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||||
|
___
|
||||||
|
|
||||||
close STDOUT; # flush
|
close STDOUT; # flush
|
||||||
|
Loading…
Reference in New Issue
Block a user