fix AVX & AVX2 detection
fixes issue #790 which resulted in a SIGILL on OpenBSD code is mostly from libwebp, based on the following: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family Change-Id: Ida7c1a18261e98c05ed9c662068140be407ec107
This commit is contained in:
parent
4e04fa6dea
commit
7e515c4637
@ -13,6 +13,7 @@
|
|||||||
#define VPX_PORTS_X86_H_
|
#define VPX_PORTS_X86_H_
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "vpx_config.h"
|
#include "vpx_config.h"
|
||||||
|
#include "vpx/vpx_integer.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -104,6 +105,37 @@ void __cpuid(int CPUInfo[4], int info_type);
|
|||||||
#endif
|
#endif
|
||||||
#endif /* end others */
|
#endif /* end others */
|
||||||
|
|
||||||
|
// NaCl has no support for xgetbv or the raw opcode.
|
||||||
|
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
|
||||||
|
static INLINE uint64_t xgetbv(void) {
|
||||||
|
const uint32_t ecx = 0;
|
||||||
|
uint32_t eax, edx;
|
||||||
|
// Use the raw opcode for xgetbv for compatibility with older toolchains.
|
||||||
|
__asm__ volatile (
|
||||||
|
".byte 0x0f, 0x01, 0xd0\n"
|
||||||
|
: "=a"(eax), "=d"(edx) : "c" (ecx));
|
||||||
|
return ((uint64_t)edx << 32) | eax;
|
||||||
|
}
|
||||||
|
#elif (defined(_M_X64) || defined(_M_IX86)) && \
|
||||||
|
defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
|
||||||
|
#include <immintrin.h>
|
||||||
|
#define xgetbv() _xgetbv(0)
|
||||||
|
#elif defined(_MSC_VER) && defined(_M_IX86)
|
||||||
|
static INLINE uint64_t xgetbv(void) {
|
||||||
|
uint32_t eax_, edx_;
|
||||||
|
__asm {
|
||||||
|
xor ecx, ecx // ecx = 0
|
||||||
|
// Use the raw opcode for xgetbv for compatibility with older toolchains.
|
||||||
|
__asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
|
||||||
|
mov eax_, eax
|
||||||
|
mov edx_, edx
|
||||||
|
}
|
||||||
|
return ((uint64_t)edx_ << 32) | eax_;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
|
||||||
|
#endif
|
||||||
|
|
||||||
#define HAS_MMX 0x01
|
#define HAS_MMX 0x01
|
||||||
#define HAS_SSE 0x02
|
#define HAS_SSE 0x02
|
||||||
#define HAS_SSE2 0x04
|
#define HAS_SSE2 0x04
|
||||||
@ -156,14 +188,17 @@ x86_simd_caps(void) {
|
|||||||
|
|
||||||
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
|
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
|
||||||
|
|
||||||
if (reg_ecx & BIT(28)) flags |= HAS_AVX;
|
// bits 27 (OSXSAVE) & 28 (256-bit AVX)
|
||||||
|
if (reg_ecx & (BIT(27) | BIT(28))) {
|
||||||
|
if ((xgetbv() & 0x6) == 0x6) {
|
||||||
|
flags |= HAS_AVX;
|
||||||
|
|
||||||
/* Get the leaf 7 feature flags. Needed to check for AVX2 support */
|
/* Get the leaf 7 feature flags. Needed to check for AVX2 support */
|
||||||
reg_eax = 7;
|
|
||||||
reg_ecx = 0;
|
|
||||||
cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
||||||
|
|
||||||
if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
|
if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return flags & mask;
|
return flags & mask;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user