x86*cpuid update [from HEAD].
This commit is contained in:
parent
cd5ab329f2
commit
b56cb7c6ea
@ -1,19 +1,12 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
$output=shift;
|
||||
$win64a=1 if ($output =~ /win64a\.[s|asm]/);
|
||||
$masm=1 if ($output =~ /\.asm/);
|
||||
open STDOUT,">$output" || die "can't open $output: $!";
|
||||
|
||||
print<<___ if(defined($win64a));
|
||||
print<<___ if(defined($masm));
|
||||
_TEXT SEGMENT
|
||||
PUBLIC OPENSSL_rdtsc
|
||||
ALIGN 16
|
||||
OPENSSL_rdtsc PROC
|
||||
rdtsc
|
||||
shl rdx,32
|
||||
or rax,rdx
|
||||
ret
|
||||
OPENSSL_rdtsc ENDP
|
||||
|
||||
PUBLIC OPENSSL_atomic_add
|
||||
ALIGN 16
|
||||
@ -45,35 +38,16 @@ OPENSSL_wipe_cpu PROC
|
||||
lea rax,QWORD PTR[rsp+8]
|
||||
ret
|
||||
OPENSSL_wipe_cpu ENDP
|
||||
|
||||
OPENSSL_ia32_cpuid PROC
|
||||
mov r8,rbx
|
||||
mov eax,1
|
||||
cpuid
|
||||
shl rcx,32
|
||||
mov eax,edx
|
||||
mov rbx,r8
|
||||
or rax,rcx
|
||||
ret
|
||||
OPENSSL_ia32_cpuid ENDP
|
||||
_TEXT ENDS
|
||||
|
||||
CRT\$XIU SEGMENT
|
||||
EXTRN OPENSSL_cpuid_setup:PROC
|
||||
DQ OPENSSL_cpuid_setup
|
||||
CRT\$XIU ENDS
|
||||
END
|
||||
|
||||
___
|
||||
print<<___ if(!defined($win64a));
|
||||
print<<___ if(!defined($masm));
|
||||
.text
|
||||
.globl OPENSSL_rdtsc
|
||||
.align 16
|
||||
OPENSSL_rdtsc:
|
||||
rdtsc
|
||||
shlq \$32,%rdx
|
||||
orq %rdx,%rax
|
||||
ret
|
||||
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
|
||||
|
||||
.globl OPENSSL_atomic_add
|
||||
.type OPENSSL_atomic_add,\@function
|
||||
@ -120,19 +94,66 @@ OPENSSL_wipe_cpu:
|
||||
ret
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
|
||||
.globl OPENSSL_ia32_cpuid
|
||||
.align 16
|
||||
OPENSSL_ia32_cpuid:
|
||||
movq %rbx,%r8
|
||||
movl \$1,%eax
|
||||
cpuid
|
||||
shlq \$32,%rcx
|
||||
movl %edx,%eax
|
||||
movq %r8,%rbx
|
||||
orq %rcx,%rax
|
||||
ret
|
||||
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
||||
|
||||
.section .init
|
||||
call OPENSSL_cpuid_setup
|
||||
|
||||
___
|
||||
|
||||
open STDOUT,"| $^X perlasm/x86_64-xlate.pl $output";
|
||||
print<<___;
|
||||
.text
|
||||
|
||||
.globl OPENSSL_rdtsc
|
||||
.type OPENSSL_rdtsc,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_rdtsc:
|
||||
rdtsc
|
||||
shl \$32,%rdx
|
||||
or %rdx,%rax
|
||||
ret
|
||||
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
|
||||
|
||||
.globl OPENSSL_ia32_cpuid
|
||||
.type OPENSSL_ia32_cpuid,\@abi-omnipotent
|
||||
.align 16
|
||||
OPENSSL_ia32_cpuid:
|
||||
mov %rbx,%r8
|
||||
|
||||
xor %eax,%eax
|
||||
cpuid
|
||||
xor %eax,%eax
|
||||
cmp \$0x756e6547,%ebx # "Genu"
|
||||
setne %al
|
||||
mov %eax,%r9d
|
||||
cmp \$0x49656e69,%edx # "ineI"
|
||||
setne %al
|
||||
or %eax,%r9d
|
||||
cmp \$0x6c65746e,%ecx # "ntel"
|
||||
setne %al
|
||||
or %eax,%r9d
|
||||
|
||||
mov \$1,%eax
|
||||
cpuid
|
||||
cmp \$0,%r9d
|
||||
jne .Lnotintel
|
||||
or \$1<<20,%edx # use reserved bit to engage RC4_CHAR
|
||||
and \$15,%ah
|
||||
cmp \$15,%ah # examine Family ID
|
||||
je .Lnotintel
|
||||
or \$1<<30,%edx # use reserved bit to skip unrolled loop
|
||||
.Lnotintel:
|
||||
bt \$28,%edx # test hyper-threading bit
|
||||
jnc .Ldone
|
||||
shr \$16,%ebx
|
||||
cmp \$1,%bl # see if cache is shared
|
||||
ja .Ldone
|
||||
and \$0xefffffff,%edx # ~(1<<28)
|
||||
.Ldone:
|
||||
shl \$32,%rcx
|
||||
mov %edx,%eax
|
||||
mov %r8,%rbx
|
||||
or %rcx,%rax
|
||||
ret
|
||||
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
||||
___
|
||||
close STDOUT; # flush
|
||||
|
@ -19,13 +19,40 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&pop ("eax");
|
||||
&xor ("ecx","eax");
|
||||
&bt ("ecx",21);
|
||||
&jnc (&label("nocpuid"));
|
||||
&jnc (&label("done"));
|
||||
&xor ("eax","eax");
|
||||
&cpuid ();
|
||||
&xor ("eax","eax");
|
||||
&cmp ("ebx",0x756e6547); # "Genu"
|
||||
&setne (&LB("eax"));
|
||||
&mov ("ebp","eax");
|
||||
&cmp ("edx",0x49656e69); # "ineI"
|
||||
&setne (&LB("eax"));
|
||||
&or ("ebp","eax");
|
||||
&cmp ("ecx",0x6c65746e); # "ntel"
|
||||
&setne (&LB("eax"));
|
||||
&or ("ebp","eax");
|
||||
&mov ("eax",1);
|
||||
&cpuid ();
|
||||
&set_label("nocpuid");
|
||||
&cmp ("ebp",0);
|
||||
&jne (&label("notP4"));
|
||||
&and (&HB("eax"),15); # familiy ID
|
||||
&cmp (&HB("eax"),15); # P4?
|
||||
&jne (&label("notP4"));
|
||||
&or ("edx",1<<20); # use reserved bit to engage RC4_CHAR
|
||||
&set_label("notP4");
|
||||
&bt ("edx",28); # test hyper-threading bit
|
||||
&jnc (&label("done"));
|
||||
&shr ("ebx",16);
|
||||
&cmp (&LB("ebx"),1); # see if cache is shared(*)
|
||||
&ja (&label("done"));
|
||||
&and ("edx",0xefffffff); # clear hyper-threading bit if not
|
||||
&set_label("done");
|
||||
&mov ("eax","edx");
|
||||
&mov ("edx","ecx");
|
||||
&function_end("OPENSSL_ia32_cpuid");
|
||||
# (*) on Core2 this value is set to 2 denoting the fact that L2
|
||||
# cache is shared between cores.
|
||||
|
||||
&external_label("OPENSSL_ia32cap_P");
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user