9f26861147
This CL fixes an overcite with the AVX2 support CL previously merged (Change-Id: Idc03f3fca4bf2d0afd33631ea1d3caf8fc34ec29) that prevented runtime execution of AVX2 code in WebM. Background: Starting with the Sandybridge processor, the CPUID instruction was enhanced to add various extended feature flag enumeration leaves. Reading these leaves requires an additional input value for the CPUID instruction which is stored in ECX. This change adds this second input value for all ARCH_X86 and ARCH_x86_64 targets to the CPUID macros, allowing checks of EBX bit 5 for AVX2 support. This capability will be required moving forward to check for future processor features. Change-Id: Ie9d872bc9ff68dad4b6578e4544e4dfd0ae26c36
251 lines
6.2 KiB
C
251 lines
6.2 KiB
C
/*
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
|
|
#ifndef VPX_PORTS_X86_H
|
|
#define VPX_PORTS_X86_H
|
|
#include <stdlib.h>
|
|
#include "vpx_config.h"
|
|
|
|
typedef enum {
|
|
VPX_CPU_UNKNOWN = -1,
|
|
VPX_CPU_AMD,
|
|
VPX_CPU_AMD_OLD,
|
|
VPX_CPU_CENTAUR,
|
|
VPX_CPU_CYRIX,
|
|
VPX_CPU_INTEL,
|
|
VPX_CPU_NEXGEN,
|
|
VPX_CPU_NSC,
|
|
VPX_CPU_RISE,
|
|
VPX_CPU_SIS,
|
|
VPX_CPU_TRANSMETA,
|
|
VPX_CPU_TRANSMETA_OLD,
|
|
VPX_CPU_UMC,
|
|
VPX_CPU_VIA,
|
|
|
|
VPX_CPU_LAST
|
|
} vpx_cpu_t;
|
|
|
|
#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
|
|
#if ARCH_X86_64
|
|
#define cpuid(func, func2, ax, bx, cx, dx)\
|
|
__asm__ __volatile__ (\
|
|
"cpuid \n\t" \
|
|
: "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
|
|
: "a" (func), "c" (func2));
|
|
#else
|
|
#define cpuid(func, func2, ax, bx, cx, dx)\
|
|
__asm__ __volatile__ (\
|
|
"mov %%ebx, %%edi \n\t" \
|
|
"cpuid \n\t" \
|
|
"xchg %%edi, %%ebx \n\t" \
|
|
: "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
|
|
: "a" (func), "c" (func2));
|
|
#endif
|
|
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
|
|
#if ARCH_X86_64
|
|
#define cpuid(func, func2, ax, bx, cx, dx)\
|
|
asm volatile (\
|
|
"xchg %rsi, %rbx \n\t" \
|
|
"cpuid \n\t" \
|
|
"movl %ebx, %edi \n\t" \
|
|
"xchg %rsi, %rbx \n\t" \
|
|
: "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
|
|
: "a" (func), "c" (func2));
|
|
#else
|
|
#define cpuid(func, func2, ax, bx, cx, dx)\
|
|
asm volatile (\
|
|
"pushl %ebx \n\t" \
|
|
"cpuid \n\t" \
|
|
"movl %ebx, %edi \n\t" \
|
|
"popl %ebx \n\t" \
|
|
: "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
|
|
: "a" (func), "c" (func2));
|
|
#endif
|
|
#else /* end __SUNPRO__ */
|
|
#if ARCH_X86_64
|
|
void __cpuid(int CPUInfo[4], int info_type);
|
|
#pragma intrinsic(__cpuid)
|
|
#define cpuid(func, func2, a, b, c, d) do {\
|
|
int regs[4];\
|
|
__cpuid(regs, func, func2);
|
|
a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\
|
|
} while(0)
|
|
#else
|
|
#define cpuid(func, func2, a, b, c, d)\
|
|
__asm mov eax, func\
|
|
__asm mov ecx, func2\
|
|
__asm cpuid\
|
|
__asm mov a, eax\
|
|
__asm mov b, ebx\
|
|
__asm mov c, ecx\
|
|
__asm mov d, edx
|
|
#endif
|
|
#endif /* end others */
|
|
|
|
#define HAS_MMX 0x01
|
|
#define HAS_SSE 0x02
|
|
#define HAS_SSE2 0x04
|
|
#define HAS_SSE3 0x08
|
|
#define HAS_SSSE3 0x10
|
|
#define HAS_SSE4_1 0x20
|
|
#define HAS_AVX 0x40
|
|
#define HAS_AVX2 0x80
|
|
#ifndef BIT
|
|
#define BIT(n) (1<<n)
|
|
#endif
|
|
|
|
static int
|
|
x86_simd_caps(void) {
|
|
unsigned int flags = 0;
|
|
unsigned int mask = ~0;
|
|
unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
|
|
char *env;
|
|
(void)reg_ebx;
|
|
|
|
/* See if the CPU capabilities are being overridden by the environment */
|
|
env = getenv("VPX_SIMD_CAPS");
|
|
|
|
if (env && *env)
|
|
return (int)strtol(env, NULL, 0);
|
|
|
|
env = getenv("VPX_SIMD_CAPS_MASK");
|
|
|
|
if (env && *env)
|
|
mask = strtol(env, NULL, 0);
|
|
|
|
/* Ensure that the CPUID instruction supports extended features */
|
|
cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
|
|
|
if (reg_eax < 1)
|
|
return 0;
|
|
|
|
/* Get the standard feature flags */
|
|
cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
|
|
|
if (reg_edx & BIT(23)) flags |= HAS_MMX;
|
|
|
|
if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
|
|
|
|
if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
|
|
|
|
if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
|
|
|
|
if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
|
|
|
|
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
|
|
|
|
if (reg_ecx & BIT(28)) flags |= HAS_AVX;
|
|
|
|
/* Get the leaf 7 feature flags. Needed to check for AVX2 support */
|
|
reg_eax = 7;
|
|
reg_ecx = 0;
|
|
cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
|
|
|
if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
|
|
|
|
return flags & mask;
|
|
}
|
|
|
|
vpx_cpu_t vpx_x86_vendor(void);
|
|
|
|
#if ARCH_X86_64 && defined(_MSC_VER)
|
|
unsigned __int64 __rdtsc(void);
|
|
#pragma intrinsic(__rdtsc)
|
|
#endif
|
|
static unsigned int
|
|
x86_readtsc(void) {
|
|
#if defined(__GNUC__) && __GNUC__
|
|
unsigned int tsc;
|
|
__asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
|
|
return tsc;
|
|
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
|
unsigned int tsc;
|
|
asm volatile("rdtsc\n\t":"=a"(tsc):);
|
|
return tsc;
|
|
#else
|
|
#if ARCH_X86_64
|
|
return (unsigned int)__rdtsc();
|
|
#else
|
|
__asm rdtsc;
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
|
|
#if defined(__GNUC__) && __GNUC__
|
|
#define x86_pause_hint()\
|
|
__asm__ __volatile__ ("pause \n\t")
|
|
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
|
#define x86_pause_hint()\
|
|
asm volatile ("pause \n\t")
|
|
#else
|
|
#if ARCH_X86_64
|
|
#define x86_pause_hint()\
|
|
_mm_pause();
|
|
#else
|
|
#define x86_pause_hint()\
|
|
__asm pause
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(__GNUC__) && __GNUC__
|
|
static void
|
|
x87_set_control_word(unsigned short mode) {
|
|
__asm__ __volatile__("fldcw %0" : : "m"(*&mode));
|
|
}
|
|
static unsigned short
|
|
x87_get_control_word(void) {
|
|
unsigned short mode;
|
|
__asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
|
|
return mode;
|
|
}
|
|
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
|
static void
|
|
x87_set_control_word(unsigned short mode) {
|
|
asm volatile("fldcw %0" : : "m"(*&mode));
|
|
}
|
|
static unsigned short
|
|
x87_get_control_word(void) {
|
|
unsigned short mode;
|
|
asm volatile("fstcw %0\n\t":"=m"(*&mode):);
|
|
return mode;
|
|
}
|
|
#elif ARCH_X86_64
|
|
/* No fldcw intrinsics on Windows x64, punt to external asm */
|
|
extern void vpx_winx64_fldcw(unsigned short mode);
|
|
extern unsigned short vpx_winx64_fstcw(void);
|
|
#define x87_set_control_word vpx_winx64_fldcw
|
|
#define x87_get_control_word vpx_winx64_fstcw
|
|
#else
|
|
static void
|
|
x87_set_control_word(unsigned short mode) {
|
|
__asm { fldcw mode }
|
|
}
|
|
static unsigned short
|
|
x87_get_control_word(void) {
|
|
unsigned short mode;
|
|
__asm { fstcw mode }
|
|
return mode;
|
|
}
|
|
#endif
|
|
|
|
static unsigned short
|
|
x87_set_double_precision(void) {
|
|
unsigned short mode = x87_get_control_word();
|
|
x87_set_control_word((mode&~0x300) | 0x200);
|
|
return mode;
|
|
}
|
|
|
|
|
|
extern void vpx_reset_mmx_state(void);
|
|
#endif
|
|
|