x86cpuid.pl update [from HEAD].

This commit is contained in:
Andy Polyakov 2007-11-11 19:44:42 +00:00
parent 11eb172b6e
commit 095db72024
2 changed files with 32 additions and 41 deletions

View File

@ -541,50 +541,13 @@ sub main'set_label
sub main'file_end
{
# try to detect if SSE2 or MMX extensions were used on ELF platform...
if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) {
local($tmp);
push (@out,"\n.section\t.bss\n");
push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
push (@out,".section\t.init\n");
# One can argue that it's wasteful to craft every
# SSE/MMX module with this snippet... Well, it's 72
# bytes long and for the moment we have two modules.
# Let's argue when we have 7 modules or so...
#
# $1<<10 sets a reserved bit to signal that variable
# was initialized already...
&main'picmeup("edx","OPENSSL_ia32cap_P");
$tmp=<<___;
cmpl \$0,(%edx)
jne 1f
movl \$1<<10,(%edx)
pushf
popl %eax
movl %eax,%ecx
xorl \$1<<21,%eax
pushl %eax
popf
pushf
popl %eax
xorl %ecx,%eax
btl \$21,%eax
jnc 1f
pushl %edi
pushl %ebx
movl %edx,%edi
movl \$1,%eax
.byte 0x0f,0xa2
orl \$1<<10,%edx
movl %edx,0(%edi)
popl %ebx
popl %edi
jmp 1f
.align $align
1:
___
push (@out,$tmp);
return;
}
if ($const ne "")

View File

@ -19,13 +19,41 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&pop ("eax");
&xor ("ecx","eax");
&bt ("ecx",21);
&jnc (&label("nocpuid"));
&jnc (&label("done"));
&xor ("eax","eax");
&cpuid ();
&xor ("eax","eax");
&cmp ("ebx",0x756e6547); # "Genu"
&data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
&mov ("ebp","eax");
&cmp ("edx",0x49656e69); # "ineI"
&data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
&or ("ebp","eax");
&cmp ("ecx",0x6c65746e); # "ntel"
&data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
&or ("ebp","eax");
&mov ("eax",1);
&cpuid ();
&set_label("nocpuid");
&cmp ("ebp",0);
&jne (&label("notP4"));
&and ("eax",15<<8); # familiy ID
&cmp ("eax",15<<8); # P4?
&jne (&label("notP4"));
&or ("edx",1<<20); # use reserved bit to engage RC4_CHAR
&set_label("notP4");
&bt ("edx",28); # test hyper-threading bit
&jnc (&label("done"));
&shr ("ebx",16);
&and ("ebx",0xff);
&cmp ("ebx",1); # see if cache is shared(*)
&ja (&label("done"));
&and ("edx",0xefffffff); # clear hyper-threading bit if not
&set_label("done");
&mov ("eax","edx");
&mov ("edx","ecx");
&function_end("OPENSSL_ia32_cpuid");
# (*) on Core2 this value is set to 2 denoting the fact that L2
# cache is shared between cores.
&external_label("OPENSSL_ia32cap_P");