Update x86cpuid.pl to correctly detect shared cache and to support new

RC4_set_key.
This commit is contained in:
Andy Polyakov 2007-04-01 17:28:08 +00:00
parent 2875462425
commit 162f677def
2 changed files with 46 additions and 15 deletions

View File

@ -20,12 +20,36 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&xor ("ecx","eax"); &xor ("ecx","eax");
&bt ("ecx",21); &bt ("ecx",21);
&jnc (&label("nocpuid")); &jnc (&label("nocpuid"));
&xor ("eax","eax");
&cpuid ();
&xor ("eax","eax");
&cmp ("ebx",0x756e6547); # "Genu"
&setne (&LB("eax"));
&mov ("ebp","eax");
&cmp ("edx",0x49656e69); # "ineI"
&setne (&LB("eax"));
&or ("ebp","eax");
&cmp ("ecx",0x6c65746e); # "ntel"
&setne (&LB("eax"));
&or ("ebp","eax");
&mov ("eax",1); &mov ("eax",1);
&cpuid (); &cpuid ();
&bt ("edx",28); # test hyper-threading bit
&jnc (&label("nocpuid"));
&cmp ("ebp",0);
&jne (&label("notintel"));
&or ("edx",1<<20); # use reserved bit to engage RC4_CHAR
&set_label("notintel");
&shr ("ebx",16);
&cmp (&LB("ebx"),1); # see if cache is shared(*)
&ja (&label("nocpuid"));
&and ("edx",~(1<<28)); # clear hyper-threading bit if not
&set_label("nocpuid"); &set_label("nocpuid");
&mov ("eax","edx"); &mov ("eax","edx");
&mov ("edx","ecx"); &mov ("edx","ecx");
&function_end("OPENSSL_ia32_cpuid"); &function_end("OPENSSL_ia32_cpuid");
# (*) on Core2 this value is set to 2 denoting the fact that L2
# cache is shared between cores.
&external_label("OPENSSL_ia32cap_P"); &external_label("OPENSSL_ia32cap_P");

View File

@ -17,20 +17,27 @@ register after executing CPUID instruction with EAX=1 input value (see
Intel Application Note #241618). Naturally it's meaningful on IA-32[E] Intel Application Note #241618). Naturally it's meaningful on IA-32[E]
platforms only. The variable is normally set up automatically upon platforms only. The variable is normally set up automatically upon
toolkit initialization, but can be manipulated afterwards to modify toolkit initialization, but can be manipulated afterwards to modify
crypto library behaviour. For the moment of this writing five bits are crypto library behaviour. For the moment of this writing six bits are
significant, namely bit #28 denoting Hyperthreading, which is used to significant, namely:
distinguish Intel P4 core, bit #26 denoting SSE2 support, bit #25
denoting SSE support, bit #23 denoting MMX support, and bit #4 denoting 1. bit #28 denoting Hyperthreading, which is used to distiguish
presence of Time-Stamp Counter. Clearing bit #26 at run-time for cores with shared cache;
example disables high-performance SSE2 code present in the crypto 2. bit #26 denoting SSE2 support;
library. You might have to do this if target OpenSSL application is 3. bit #25 denoting SSE support;
executed on SSE2 capable CPU, but under control of OS which does not 4. bit #23 denoting MMX support;
support SSE2 extentions. Even though you can manipulate the value 5. bit #20, reserved by Intel, is used to choose between RC4 code
programmatically, you most likely will find it more appropriate to set pathes;
up an environment variable with the same name prior starting target 6. bit #4 denoting presence of Time-Stamp Counter.
application, e.g. 'env OPENSSL_ia32cap=0x12800010 apps/openssl', to
achieve same effect without modifying the application source code. For example, clearing bit #26 at run-time disables high-performance
Alternatively you can reconfigure the toolkit with no-sse2 option and SSE2 code present in the crypto library. You might have to do this if
recompile. target OpenSSL application is executed on SSE2 capable CPU, but under
control of OS which does not support SSE2 extentions. Even though you
can manipulate the value programmatically, you most likely will find it
more appropriate to set up an environment variable with the same name
prior starting target application, e.g. on Intel P4 processor 'env
OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect
without modifying the application source code. Alternatively you can
reconfigure the toolkit with no-sse2 option and recompile.
=cut =cut