detection of other CPU features

This commit is contained in:
Ilya Lavrenov 2015-01-12 10:59:30 +03:00
parent 3a426660ea
commit bc394e7516
6 changed files with 157 additions and 27 deletions

View File

@ -214,12 +214,14 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov"
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" ON IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR IOS) )

View File

@ -122,16 +122,19 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if(ENABLE_POWERPC)
add_extra_compiler_option("-mcpu=G3 -mtune=G5")
endif()
if(ENABLE_POPCNT)
add_extra_compiler_option(-mpopcnt)
endif()
if(ENABLE_SSE)
add_extra_compiler_option(-msse)
endif()
if(ENABLE_SSE2)
add_extra_compiler_option(-msse2)
endif()
if (ENABLE_NEON)
if(ENABLE_NEON)
add_extra_compiler_option("-mfpu=neon")
endif()
if (ENABLE_VFPV3 AND NOT ENABLE_NEON)
if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
add_extra_compiler_option("-mfpu=vfpv3")
endif()
@ -162,6 +165,10 @@ if(CMAKE_COMPILER_IS_GNUCXX)
add_extra_compiler_option(-msse4.2)
endif()
endif()
if(ENABLE_FMA3)
add_extra_compiler_option(-mfma)
endif()
endif(NOT MINGW)
if(X86 OR X86_64)

View File

@ -104,18 +104,32 @@
#endif
/* CPU features and intrinsics support */
#define CV_CPU_NONE 0
#define CV_CPU_MMX 1
#define CV_CPU_SSE 2
#define CV_CPU_SSE2 3
#define CV_CPU_SSE3 4
#define CV_CPU_SSSE3 5
#define CV_CPU_SSE4_1 6
#define CV_CPU_SSE4_2 7
#define CV_CPU_POPCNT 8
#define CV_CPU_AVX 10
#define CV_CPU_AVX2 11
#define CV_CPU_NEON 12
#define CV_CPU_NONE 0
#define CV_CPU_MMX 1
#define CV_CPU_SSE 2
#define CV_CPU_SSE2 3
#define CV_CPU_SSE3 4
#define CV_CPU_SSSE3 5
#define CV_CPU_SSE4_1 6
#define CV_CPU_SSE4_2 7
#define CV_CPU_POPCNT 8
#define CV_CPU_AVX 10
#define CV_CPU_AVX2 11
#define CV_CPU_FMA3 12
#define CV_CPU_AVX_512F 13
#define CV_CPU_AVX_512BW 14
#define CV_CPU_AVX_512CD 15
#define CV_CPU_AVX_512DQ 16
#define CV_CPU_AVX_512ER 17
#define CV_CPU_AVX_512IFMA512 18
#define CV_CPU_AVX_512PF 19
#define CV_CPU_AVX_512VBMI 20
#define CV_CPU_AVX_512VL 21
#define CV_CPU_NEON 100
// when adding to this list remember to update the enum in core/utility.cpp
#define CV_HARDWARE_MAX_FEATURE 255
@ -124,6 +138,7 @@
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include <emmintrin.h>
# define CV_MMX
# define CV_SSE 1
# define CV_SSE2 1
# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
@ -142,6 +157,14 @@
# include <nmmintrin.h>
# define CV_SSE4_2 1
# endif
# if defined __FMA__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <immintrin.h>
# define CV_FMA3 1
# endif
# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <popcntintrin.h>
# define CV_POPCNT 1
# endif
# if defined __AVX__ || defined __AVX2__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219)
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
@ -151,11 +174,12 @@
# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
# else
# define __xgetbv() 0
# ifdef __AVX2__
# define CV_AVX2 1
# endif
# endif
# endif
# if defined __AVX2__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219)
# include <immintrin.h>
# define CV_AVX2 1
# endif
#endif
#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
@ -170,6 +194,9 @@
#endif // __CUDACC__
#ifndef CV_MMX
# define CV_MMX 0
#endif
#ifndef CV_SSE
# define CV_SSE 0
#endif
@ -194,6 +221,40 @@
#ifndef CV_AVX2
# define CV_AVX2 0
#endif
#ifndef CV_POPCNT
#define CV_POPCNT 0
#endif
#ifndef CV_FMA3
# define CV_FMA3 0
#endif
#ifndef CV_AVX_512F
# define CV_AVX_512F 0
#endif
#ifndef CV_AVX_512BW
# define CV_AVX_512BW 0
#endif
#ifndef CV_AVX_512CD
# define CV_AVX_512CD 0
#endif
#ifndef CV_AVX_512DQ
# define CV_AVX_512DQ 0
#endif
#ifndef CV_AVX_512ER
# define CV_AVX_512ER 0
#endif
#ifndef CV_AVX_512IFMA512
# define CV_AVX_512IFMA512 0
#endif
#ifndef CV_AVX_512PF
# define CV_AVX_512PF 0
#endif
#ifndef CV_AVX_512VBMI
# define CV_AVX_512VBMI 0
#endif
#ifndef CV_AVX_512VL
# define CV_AVX_512VL 0
#endif
#ifndef CV_NEON
# define CV_NEON 0
#endif

View File

@ -281,16 +281,30 @@ CV_EXPORTS_W int64 getCPUTickCount();
remember to keep this list identical to the one in cvdef.h
*/
enum CpuFeatures {
CPU_MMX = 1,
CPU_SSE = 2,
CPU_SSE2 = 3,
CPU_SSE3 = 4,
CPU_SSSE3 = 5,
CPU_SSE4_1 = 6,
CPU_SSE4_2 = 7,
CPU_POPCNT = 8,
CPU_AVX = 10,
CPU_NEON = 11
CPU_MMX = 1,
CPU_SSE = 2,
CPU_SSE2 = 3,
CPU_SSE3 = 4,
CPU_SSSE3 = 5,
CPU_SSE4_1 = 6,
CPU_SSE4_2 = 7,
CPU_POPCNT = 8,
CPU_AVX = 10,
CPU_AVX2 = 11,
CPU_FMA3 = 12,
CPU_AVX_512F = 13,
CPU_AVX_512BW = 14,
CPU_AVX_512CD = 15,
CPU_AVX_512DQ = 16,
CPU_AVX_512ER = 17,
CPU_AVX_512IFMA512 = 18,
CPU_AVX_512PF = 19,
CPU_AVX_512VBMI = 20,
CPU_AVX_512VL = 21,
CPU_NEON = 100
};
/** @brief Returns true if the specified feature is supported by the host hardware.

View File

@ -263,6 +263,7 @@ struct HWFeatures
f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
@ -301,6 +302,15 @@ struct HWFeatures
#endif
f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
}
return f;

View File

@ -2998,6 +2998,12 @@ void printVersionInfo(bool useStdOut)
std::string cpu_features;
#if CV_MMX
if (checkHardwareSupport(CV_CPU_MMX)) cpu_features += " mmx";
#endif
#if CV_POPCNT
if (checkHardwareSupport(CV_CPU_POPCNT)) cpu_features += " popcnt";
#endif
#if CV_SSE
if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse";
#endif
@ -3022,6 +3028,36 @@ void printVersionInfo(bool useStdOut)
#if CV_AVX2
if (checkHardwareSupport(CV_CPU_AVX2)) cpu_features += " avx2";
#endif
#if CV_FMA3
if (checkHardwareSupport(CV_CPU_FMA3)) cpu_features += " fma3";
#endif
#if CV_AVX_512F
if (checkHardwareSupport(CV_CPU_AVX_512F) cpu_features += " avx-512f";
#endif
#if CV_AVX_512BW
if (checkHardwareSupport(CV_CPU_AVX_512BW) cpu_features += " avx-512bw";
#endif
#if CV_AVX_512CD
if (checkHardwareSupport(CV_CPU_AVX_512CD) cpu_features += " avx-512cd";
#endif
#if CV_AVX_512DQ
if (checkHardwareSupport(CV_CPU_AVX_512DQ) cpu_features += " avx-512dq";
#endif
#if CV_AVX_512ER
if (checkHardwareSupport(CV_CPU_AVX_512ER) cpu_features += " avx-512er";
#endif
#if CV_AVX_512IFMA512
if (checkHardwareSupport(CV_CPU_AVX_512IFMA512) cpu_features += " avx-512ifma512";
#endif
#if CV_AVX_512PF
if (checkHardwareSupport(CV_CPU_AVX_512PF) cpu_features += " avx-512pf";
#endif
#if CV_AVX_512VBMI
if (checkHardwareSupport(CV_CPU_AVX_512VBMI) cpu_features += " avx-512vbmi";
#endif
#if CV_AVX_512VL
if (checkHardwareSupport(CV_CPU_AVX_512VL) cpu_features += " avx-512vl";
#endif
#if CV_NEON
cpu_features += " neon"; // NEON is currently not checked at runtime
#endif