Extend OPENSSL_ia32cap_P (backport from HEAD).
This commit is contained in:
		| @@ -665,7 +665,7 @@ const char *CRYPTO_get_lock_name(int type) | |||||||
| 	defined(__INTEL__) || \ | 	defined(__INTEL__) || \ | ||||||
| 	defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) | 	defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) | ||||||
|  |  | ||||||
| unsigned int  OPENSSL_ia32cap_P[2]; | extern unsigned int  OPENSSL_ia32cap_P[4]; | ||||||
| unsigned long *OPENSSL_ia32cap_loc(void) | unsigned long *OPENSSL_ia32cap_loc(void) | ||||||
| {   if (sizeof(long)==4) | {   if (sizeof(long)==4) | ||||||
| 	/* | 	/* | ||||||
| @@ -674,6 +674,9 @@ unsigned long *OPENSSL_ia32cap_loc(void) | |||||||
| 	 * is 32-bit. | 	 * is 32-bit. | ||||||
| 	 */ | 	 */ | ||||||
| 	OPENSSL_ia32cap_P[1]=0; | 	OPENSSL_ia32cap_P[1]=0; | ||||||
|  |  | ||||||
|  |     OPENSSL_ia32cap_P[2]=0; | ||||||
|  |  | ||||||
|     return (unsigned long *)OPENSSL_ia32cap_P; |     return (unsigned long *)OPENSSL_ia32cap_P; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -686,7 +689,7 @@ typedef unsigned long long IA32CAP; | |||||||
| #endif | #endif | ||||||
| void OPENSSL_cpuid_setup(void) | void OPENSSL_cpuid_setup(void) | ||||||
| { static int trigger=0; | { static int trigger=0; | ||||||
|   IA32CAP OPENSSL_ia32_cpuid(void); |   IA32CAP OPENSSL_ia32_cpuid(unsigned int *); | ||||||
|   IA32CAP vec; |   IA32CAP vec; | ||||||
|   char *env; |   char *env; | ||||||
|  |  | ||||||
| @@ -700,10 +703,21 @@ void OPENSSL_cpuid_setup(void) | |||||||
| #else | #else | ||||||
| 	if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0); | 	if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0); | ||||||
| #endif | #endif | ||||||
| 	if (off) vec = OPENSSL_ia32_cpuid()&~vec; | 	if (off) vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P)&~vec; | ||||||
|  | 	else if (env[0]==':') vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); | ||||||
|  |  | ||||||
|  | 	OPENSSL_ia32cap_P[2] = 0; | ||||||
|  | 	if ((env=strchr(env,':'))) { | ||||||
|  | 	    unsigned int vecx; | ||||||
|  | 	    env++; | ||||||
|  | 	    off = (env[0]=='~')?1:0; | ||||||
|  | 	    vecx = strtoul(env+off,NULL,0); | ||||||
|  | 	    if (off)	OPENSSL_ia32cap_P[2] &= ~vecx; | ||||||
|  | 	    else	OPENSSL_ia32cap_P[2] = vecx; | ||||||
|  | 	} | ||||||
|     } |     } | ||||||
|     else |     else | ||||||
| 	vec = OPENSSL_ia32_cpuid(); | 	vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); | ||||||
|  |  | ||||||
|     /* |     /* | ||||||
|      * |(1<<10) sets a reserved bit to signal that variable |      * |(1<<10) sets a reserved bit to signal that variable | ||||||
| @@ -713,6 +727,8 @@ void OPENSSL_cpuid_setup(void) | |||||||
|     OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); |     OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); | ||||||
|     OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); |     OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); | ||||||
| } | } | ||||||
|  | #else | ||||||
|  | unsigned int OPENSSL_ia32cap_P[4]; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #else | #else | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ print<<___; | |||||||
| 	call	OPENSSL_cpuid_setup | 	call	OPENSSL_cpuid_setup | ||||||
|  |  | ||||||
| .hidden	OPENSSL_ia32cap_P | .hidden	OPENSSL_ia32cap_P | ||||||
| .comm	OPENSSL_ia32cap_P,8,4 | .comm	OPENSSL_ia32cap_P,16,4 | ||||||
|  |  | ||||||
| .text | .text | ||||||
|  |  | ||||||
| @@ -53,12 +53,13 @@ OPENSSL_rdtsc: | |||||||
| .size	OPENSSL_rdtsc,.-OPENSSL_rdtsc | .size	OPENSSL_rdtsc,.-OPENSSL_rdtsc | ||||||
|  |  | ||||||
| .globl	OPENSSL_ia32_cpuid | .globl	OPENSSL_ia32_cpuid | ||||||
| .type	OPENSSL_ia32_cpuid,\@abi-omnipotent | .type	OPENSSL_ia32_cpuid,\@function,1 | ||||||
| .align	16 | .align	16 | ||||||
| OPENSSL_ia32_cpuid: | OPENSSL_ia32_cpuid: | ||||||
| 	mov	%rbx,%r8		# save %rbx | 	mov	%rbx,%r8		# save %rbx | ||||||
|  |  | ||||||
| 	xor	%eax,%eax | 	xor	%eax,%eax | ||||||
|  | 	mov	%eax,8(%rdi)		# clear 3rd word | ||||||
| 	cpuid | 	cpuid | ||||||
| 	mov	%eax,%r11d		# max value for standard query level | 	mov	%eax,%r11d		# max value for standard query level | ||||||
|  |  | ||||||
| @@ -126,6 +127,14 @@ OPENSSL_ia32_cpuid: | |||||||
| 	shr	\$14,%r10d | 	shr	\$14,%r10d | ||||||
| 	and	\$0xfff,%r10d		# number of cores -1 per L1D | 	and	\$0xfff,%r10d		# number of cores -1 per L1D | ||||||
|  |  | ||||||
|  | 	cmp	\$7,%r11d | ||||||
|  | 	jb	.Lnocacheinfo | ||||||
|  |  | ||||||
|  | 	mov	\$7,%eax | ||||||
|  | 	xor	%ecx,%ecx | ||||||
|  | 	cpuid | ||||||
|  | 	mov	%ebx,8(%rdi) | ||||||
|  |  | ||||||
| .Lnocacheinfo: | .Lnocacheinfo: | ||||||
| 	mov	\$1,%eax | 	mov	\$1,%eax | ||||||
| 	cpuid | 	cpuid | ||||||
| @@ -165,6 +174,7 @@ OPENSSL_ia32_cpuid: | |||||||
| .Lclear_avx: | .Lclear_avx: | ||||||
| 	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11) | 	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11) | ||||||
| 	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits | 	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits | ||||||
|  | 	andl	\$0xffffffdf,8(%rdi)	# cleax AVX2, ~(1<<5) | ||||||
| .Ldone: | .Ldone: | ||||||
| 	shl	\$32,%r9 | 	shl	\$32,%r9 | ||||||
| 	mov	%r10d,%eax | 	mov	%r10d,%eax | ||||||
|   | |||||||
| @@ -22,6 +22,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||||||
| 	&xor	("eax","eax"); | 	&xor	("eax","eax"); | ||||||
| 	&bt	("ecx",21); | 	&bt	("ecx",21); | ||||||
| 	&jnc	(&label("nocpuid")); | 	&jnc	(&label("nocpuid")); | ||||||
|  | 	&mov	("esi",&wparam(0)); | ||||||
|  | 	&mov	(&DWP(8,"esi"),"eax");	# clear 3rd word | ||||||
| 	&cpuid	(); | 	&cpuid	(); | ||||||
| 	&mov	("edi","eax");		# max value for standard query level | 	&mov	("edi","eax");		# max value for standard query level | ||||||
|  |  | ||||||
| @@ -79,6 +81,16 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||||||
| 	&jmp	(&label("generic")); | 	&jmp	(&label("generic")); | ||||||
| 	 | 	 | ||||||
| &set_label("intel"); | &set_label("intel"); | ||||||
|  | 	&cmp	("edi",7); | ||||||
|  | 	&jb	(&label("cacheinfo")); | ||||||
|  |  | ||||||
|  | 	&mov	("esi",&wparam(0)); | ||||||
|  | 	&mov	("eax",7); | ||||||
|  | 	&xor	("ecx","ecx"); | ||||||
|  | 	&cpuid	(); | ||||||
|  | 	&mov	(&DWP(8,"esi"),"ebx"); | ||||||
|  |  | ||||||
|  | &set_label("cacheinfo"); | ||||||
| 	&cmp	("edi",4); | 	&cmp	("edi",4); | ||||||
| 	&mov	("edi",-1); | 	&mov	("edi",-1); | ||||||
| 	&jb	(&label("nocacheinfo")); | 	&jb	(&label("nocacheinfo")); | ||||||
| @@ -135,6 +147,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||||||
| 	&and	("esi",0xfeffffff);	# clear FXSR | 	&and	("esi",0xfeffffff);	# clear FXSR | ||||||
| &set_label("clear_avx"); | &set_label("clear_avx"); | ||||||
| 	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits | 	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits | ||||||
|  | 	&mov	("edi",&wparam(0)); | ||||||
|  | 	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2 | ||||||
| &set_label("done"); | &set_label("done"); | ||||||
| 	&mov	("eax","esi"); | 	&mov	("eax","esi"); | ||||||
| 	&mov	("edx","ebp"); | 	&mov	("edx","ebp"); | ||||||
| @@ -198,7 +212,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||||||
|  |  | ||||||
| &function_begin_B("OPENSSL_far_spin"); | &function_begin_B("OPENSSL_far_spin"); | ||||||
| 	&pushf	(); | 	&pushf	(); | ||||||
| 	&pop	("eax") | 	&pop	("eax"); | ||||||
| 	&bt	("eax",9); | 	&bt	("eax",9); | ||||||
| 	&jnc	(&label("nospin"));	# interrupts are disabled | 	&jnc	(&label("nospin"));	# interrupts are disabled | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Andy Polyakov
					Andy Polyakov