From bc394e7516194be88816cbc8cf7603d394f84433 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 12 Jan 2015 10:59:30 +0300 Subject: [PATCH] detection of other CPU features --- CMakeLists.txt | 2 + cmake/OpenCVCompilerOptions.cmake | 11 ++- modules/core/include/opencv2/core/cvdef.h | 91 ++++++++++++++++--- modules/core/include/opencv2/core/utility.hpp | 34 +++++-- modules/core/src/system.cpp | 10 ++ modules/ts/src/ts_func.cpp | 36 ++++++++ 6 files changed, 157 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b5648efd..d9bb04081 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,12 +214,14 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) +OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) +OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" ON IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR IOS) ) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 831026fb5..3d1155c87 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -122,16 +122,19 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(ENABLE_POWERPC) add_extra_compiler_option("-mcpu=G3 -mtune=G5") endif() + if(ENABLE_POPCNT) + add_extra_compiler_option(-mpopcnt) + endif() if(ENABLE_SSE) add_extra_compiler_option(-msse) endif() if(ENABLE_SSE2) add_extra_compiler_option(-msse2) endif() - if (ENABLE_NEON) + if(ENABLE_NEON) add_extra_compiler_option("-mfpu=neon") endif() - if (ENABLE_VFPV3 AND NOT ENABLE_NEON) + if(ENABLE_VFPV3 AND NOT ENABLE_NEON) add_extra_compiler_option("-mfpu=vfpv3") endif() @@ -162,6 +165,10 @@ if(CMAKE_COMPILER_IS_GNUCXX) add_extra_compiler_option(-msse4.2) endif() endif() + + if(ENABLE_FMA3) + add_extra_compiler_option(-mfma) + endif() endif(NOT MINGW) if(X86 OR X86_64) diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index c52cb021c..45146a39c 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -104,18 +104,32 @@ #endif /* CPU features and intrinsics support */ -#define CV_CPU_NONE 0 -#define CV_CPU_MMX 1 -#define CV_CPU_SSE 2 -#define CV_CPU_SSE2 3 -#define CV_CPU_SSE3 4 -#define CV_CPU_SSSE3 5 -#define CV_CPU_SSE4_1 6 -#define CV_CPU_SSE4_2 7 -#define CV_CPU_POPCNT 8 -#define CV_CPU_AVX 10 -#define CV_CPU_AVX2 11 -#define CV_CPU_NEON 12 +#define CV_CPU_NONE 0 +#define CV_CPU_MMX 1 +#define CV_CPU_SSE 2 +#define CV_CPU_SSE2 3 +#define CV_CPU_SSE3 4 +#define CV_CPU_SSSE3 5 +#define CV_CPU_SSE4_1 6 +#define CV_CPU_SSE4_2 7 +#define CV_CPU_POPCNT 8 + +#define CV_CPU_AVX 10 +#define CV_CPU_AVX2 11 +#define CV_CPU_FMA3 12 + +#define CV_CPU_AVX_512F 13 +#define CV_CPU_AVX_512BW 14 +#define CV_CPU_AVX_512CD 15 +#define CV_CPU_AVX_512DQ 16 +#define CV_CPU_AVX_512ER 17 +#define CV_CPU_AVX_512IFMA512 18 +#define CV_CPU_AVX_512PF 19 +#define CV_CPU_AVX_512VBMI 20 +#define CV_CPU_AVX_512VL 21 + +#define CV_CPU_NEON 100 + // when adding to this list remember to update the enum in core/utility.cpp #define CV_HARDWARE_MAX_FEATURE 255 @@ -124,6 +138,7 @@ #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) # include +# define CV_MMX # define CV_SSE 1 # define CV_SSE2 1 # if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) @@ -142,6 +157,14 @@ # include # define CV_SSE4_2 1 # endif +# if defined __FMA__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_FMA3 1 +# endif +# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_POPCNT 1 +# endif # if defined __AVX__ || defined __AVX2__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219) // MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX // See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 @@ -151,11 +174,12 @@ # define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) # else # define __xgetbv() 0 -# ifdef __AVX2__ -# define CV_AVX2 1 -# endif # endif # endif +# if defined __AVX2__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219) +# include +# define CV_AVX2 1 +# endif #endif #if (defined WIN32 || defined _WIN32) && defined(_M_ARM) @@ -170,6 +194,9 @@ #endif // __CUDACC__ +#ifndef CV_MMX +# define CV_MMX 0 +#endif #ifndef CV_SSE # define CV_SSE 0 #endif @@ -194,6 +221,40 @@ #ifndef CV_AVX2 # define CV_AVX2 0 #endif +#ifndef CV_POPCNT +#define CV_POPCNT 0 +#endif +#ifndef CV_FMA3 +# define CV_FMA3 0 +#endif +#ifndef CV_AVX_512F +# define CV_AVX_512F 0 +#endif +#ifndef CV_AVX_512BW +# define CV_AVX_512BW 0 +#endif +#ifndef CV_AVX_512CD +# define CV_AVX_512CD 0 +#endif +#ifndef CV_AVX_512DQ +# define CV_AVX_512DQ 0 +#endif +#ifndef CV_AVX_512ER +# define CV_AVX_512ER 0 +#endif +#ifndef CV_AVX_512IFMA512 +# define CV_AVX_512IFMA512 0 +#endif +#ifndef CV_AVX_512PF +# define CV_AVX_512PF 0 +#endif +#ifndef CV_AVX_512VBMI +# define CV_AVX_512VBMI 0 +#endif +#ifndef CV_AVX_512VL +# define CV_AVX_512VL 0 +#endif + #ifndef CV_NEON # define CV_NEON 0 #endif diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 88989ef5c..fb8ccd88d 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -281,16 +281,30 @@ CV_EXPORTS_W int64 getCPUTickCount(); remember to keep this list identical to the one in cvdef.h */ enum CpuFeatures { - CPU_MMX = 1, - CPU_SSE = 2, - CPU_SSE2 = 3, - CPU_SSE3 = 4, - CPU_SSSE3 = 5, - CPU_SSE4_1 = 6, - CPU_SSE4_2 = 7, - CPU_POPCNT = 8, - CPU_AVX = 10, - CPU_NEON = 11 + CPU_MMX = 1, + CPU_SSE = 2, + CPU_SSE2 = 3, + CPU_SSE3 = 4, + CPU_SSSE3 = 5, + CPU_SSE4_1 = 6, + CPU_SSE4_2 = 7, + CPU_POPCNT = 8, + + CPU_AVX = 10, + CPU_AVX2 = 11, + CPU_FMA3 = 12, + + CPU_AVX_512F = 13, + CPU_AVX_512BW = 14, + CPU_AVX_512CD = 15, + CPU_AVX_512DQ = 16, + CPU_AVX_512ER = 17, + CPU_AVX_512IFMA512 = 18, + CPU_AVX_512PF = 19, + CPU_AVX_512VBMI = 20, + CPU_AVX_512VL = 21, + + CPU_NEON = 100 }; /** @brief Returns true if the specified feature is supported by the host hardware. diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 11bbab3a2..a7a6e98d4 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -263,6 +263,7 @@ struct HWFeatures f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; + f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; @@ -301,6 +302,15 @@ struct HWFeatures #endif f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0; + f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0; + f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0; + f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0; + f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0; + f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0; + f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0; + f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0; + f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0; + f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0; } return f; diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index 53b62e74d..84a9233dd 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2998,6 +2998,12 @@ void printVersionInfo(bool useStdOut) std::string cpu_features; +#if CV_MMX + if (checkHardwareSupport(CV_CPU_MMX)) cpu_features += " mmx"; +#endif +#if CV_POPCNT + if (checkHardwareSupport(CV_CPU_POPCNT)) cpu_features += " popcnt"; +#endif #if CV_SSE if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse"; #endif @@ -3022,6 +3028,36 @@ void printVersionInfo(bool useStdOut) #if CV_AVX2 if (checkHardwareSupport(CV_CPU_AVX2)) cpu_features += " avx2"; #endif +#if CV_FMA3 + if (checkHardwareSupport(CV_CPU_FMA3)) cpu_features += " fma3"; +#endif +#if CV_AVX_512F + if (checkHardwareSupport(CV_CPU_AVX_512F) cpu_features += " avx-512f"; +#endif +#if CV_AVX_512BW + if (checkHardwareSupport(CV_CPU_AVX_512BW) cpu_features += " avx-512bw"; +#endif +#if CV_AVX_512CD + if (checkHardwareSupport(CV_CPU_AVX_512CD) cpu_features += " avx-512cd"; +#endif +#if CV_AVX_512DQ + if (checkHardwareSupport(CV_CPU_AVX_512DQ) cpu_features += " avx-512dq"; +#endif +#if CV_AVX_512ER + if (checkHardwareSupport(CV_CPU_AVX_512ER) cpu_features += " avx-512er"; +#endif +#if CV_AVX_512IFMA512 + if (checkHardwareSupport(CV_CPU_AVX_512IFMA512) cpu_features += " avx-512ifma512"; +#endif +#if CV_AVX_512PF + if (checkHardwareSupport(CV_CPU_AVX_512PF) cpu_features += " avx-512pf"; +#endif +#if CV_AVX_512VBMI + if (checkHardwareSupport(CV_CPU_AVX_512VBMI) cpu_features += " avx-512vbmi"; +#endif +#if CV_AVX_512VL + if (checkHardwareSupport(CV_CPU_AVX_512VL) cpu_features += " avx-512vl"; +#endif #if CV_NEON cpu_features += " neon"; // NEON is currently not checked at runtime #endif