add feature to convert FP32(float) to FP16(half)

* check compiler support
  * check HW support before executing
  * add test doing round trip conversion from / to FP32
  * treat array correctly if size is not multiple of 4
  * add declaration to prevent warning
  * make it possible to enable fp16 on 32bit ARM
  * let the conversion possible on non-supported HW, too.
  * add test using both HW and SW implementation
This commit is contained in:
Tomoaki Teshima
2016-05-21 21:31:33 +09:00
parent c3d1f94ee6
commit b2ad7cd9c0
8 changed files with 459 additions and 5 deletions

View File

@@ -524,6 +524,17 @@ For example:
CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst,
double alpha = 1, double beta = 0);
/** @brief Converts an array to half precision floating number.
convertFp16 converts FP32 to FP16 or FP16 to FP32. The input array has to have type of CV_32F or
CV_16S to represent the bit depth. If the input array is neither of them, it'll do nothing.
@param src input array.
@param dst output array.
@param useHW if possible use HW SIMD instruction to convert
*/
CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst, bool useHW = true);
/** @brief Performs a look-up table transform of an array.
The function LUT fills the output array with values from the look-up table. Indices of the entries

View File

@@ -112,7 +112,7 @@
#define CV_CPU_SSE4_1 6
#define CV_CPU_SSE4_2 7
#define CV_CPU_POPCNT 8
#define CV_CPU_FP16 9
#define CV_CPU_AVX 10
#define CV_CPU_AVX2 11
#define CV_CPU_FMA3 12
@@ -143,7 +143,7 @@ enum CpuFeatures {
CPU_SSE4_1 = 6,
CPU_SSE4_2 = 7,
CPU_POPCNT = 8,
CPU_FP16 = 9,
CPU_AVX = 10,
CPU_AVX2 = 11,
CPU_FMA3 = 12,
@@ -193,6 +193,10 @@ enum CpuFeatures {
# endif
# define CV_POPCNT 1
# endif
# if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)
# include <immintrin.h>
# define CV_FP16 1
# endif
# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
@@ -223,6 +227,10 @@ enum CpuFeatures {
# define CV_NEON 1
#endif
#if defined __GNUC__ && ((defined (__arm__) && (__ARM_FP & 0x2)) || defined(__aarch64__))
# define CV_FP16 1
#endif
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
# define CV_VFP 1
#endif
@@ -253,6 +261,9 @@ enum CpuFeatures {
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_FP16
# define CV_FP16 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
#endif