add feature to convert FP32(float) to FP16(half)

* check compiler support * check HW support before executing * add test doing round trip conversion from / to FP32 * treat array correctly if size is not multiple of 4 * add declaration to prevent warning * make it possible to enable fp16 on 32bit ARM * let the conversion possible on non-supported HW, too. * add test using both HW and SW implementation
2016-05-21 21:31:33 +09:00
parent c3d1f94ee6
commit b2ad7cd9c0
8 changed files with 459 additions and 5 deletions
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -524,6 +524,17 @@ For example:
 CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst,
                                  double alpha = 1, double beta = 0);

+/** @brief Converts an array to half precision floating number.
+
+convertFp16 converts FP32 to FP16 or FP16 to FP32.  The input array has to have type of CV_32F or
+CV_16S to represent the bit depth.  If the input array is neither of them, it'll do nothing.
+
+@param src input array.
+@param dst output array.
+@param useHW if possible use HW SIMD instruction to convert
+*/
+CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst, bool useHW = true);
+
 /** @brief Performs a look-up table transform of an array.

 The function LUT fills the output array with values from the look-up table. Indices of the entries
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -112,7 +112,7 @@
 #define CV_CPU_SSE4_1           6
 #define CV_CPU_SSE4_2           7
 #define CV_CPU_POPCNT           8
-
+#define CV_CPU_FP16             9
 #define CV_CPU_AVX              10
 #define CV_CPU_AVX2             11
 #define CV_CPU_FMA3             12
@@ -143,7 +143,7 @@ enum CpuFeatures {
    CPU_SSE4_1          = 6,
    CPU_SSE4_2          = 7,
    CPU_POPCNT          = 8,
-
+    CPU_FP16            = 9,
    CPU_AVX             = 10,
    CPU_AVX2            = 11,
    CPU_FMA3            = 12,
@@ -193,6 +193,10 @@ enum CpuFeatures {
 #    endif
 #    define CV_POPCNT 1
 #  endif
+#  if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)
+#    include <immintrin.h>
+#    define CV_FP16 1
+#  endif
 #  if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
 // MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
 // See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
@@ -223,6 +227,10 @@ enum CpuFeatures {
 #  define CV_NEON 1
 #endif

+#if defined __GNUC__ && ((defined (__arm__) && (__ARM_FP & 0x2)) || defined(__aarch64__))
+#    define CV_FP16 1
+#endif
+
 #if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
 #  define CV_VFP 1
 #endif
@@ -253,6 +261,9 @@ enum CpuFeatures {
 #ifndef CV_SSE4_2
 #  define CV_SSE4_2 0
 #endif
+#ifndef CV_FP16
+#  define CV_FP16 0
+#endif
 #ifndef CV_AVX
 #  define CV_AVX 0
 #endif