From 6f68640d4d0fb524645c25b87a064fd2fbae81fb Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Tue, 26 Mar 2013 17:19:52 -0700
Subject: [PATCH] Multiple fixes for WinRT

Fixed flann build with NEON;
Fixed Haming distance with NEON;
Honest cvRound for WinRT added;
cvRound test added;
Video IO with direct show disabled;
---
 CMakeLists.txt                                |  2 +-
 cmake/OpenCVFindLibsVideo.cmake               |  2 +-
 .../core/include/opencv2/core/internal.hpp    |  1 -
 modules/core/include/opencv2/core/types_c.h   |  7 +-
 modules/core/src/stat.cpp                     | 64 +++++++++----------
 modules/core/test/test_arithm.cpp             | 13 ++++
 modules/flann/include/opencv2/flann/dist.h    |  7 +-
 modules/highgui/CMakeLists.txt                | 15 ++---
 platforms/winrt/scripts/cmake_winrt.cmd       |  2 +-
 9 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 22ee7fe7b..9b7f8c2d7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,7 +138,7 @@ OCV_OPTION(WITH_CSTRIPES       "Include C= support"                          OFF
 OCV_OPTION(WITH_TIFF           "Include TIFF support"                        ON   IF (NOT IOS) )
 OCV_OPTION(WITH_UNICAP         "Include Unicap support (GPL)"                OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_V4L            "Include Video 4 Linux support"               ON   IF (UNIX AND NOT ANDROID) )
-OCV_OPTION(WITH_VIDEOINPUT     "Build HighGUI with DirectShow support"       ON   IF WIN32 )
+OCV_OPTION(WITH_VIDEOINPUT     "Build HighGUI with DirectShow support"       ON   IF WIN32 AND NOT ARM )
 OCV_OPTION(WITH_XIMEA          "Include XIMEA cameras support"               OFF  IF (NOT ANDROID AND NOT APPLE) )
 OCV_OPTION(WITH_XINE           "Include Xine support (GPL)"                  OFF  IF (UNIX AND NOT APPLE AND NOT ANDROID) )
 OCV_OPTION(WITH_OPENCL         "Include OpenCL Runtime support"              OFF  IF (NOT ANDROID AND NOT IOS) )
diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake
index 414918527..3556ba562 100644
--- a/cmake/OpenCVFindLibsVideo.cmake
+++ b/cmake/OpenCVFindLibsVideo.cmake
@@ -111,7 +111,7 @@ endif(WITH_XIMEA)
 # --- FFMPEG ---
 ocv_clear_vars(HAVE_FFMPEG HAVE_FFMPEG_CODEC HAVE_FFMPEG_FORMAT HAVE_FFMPEG_UTIL HAVE_FFMPEG_SWSCALE HAVE_GENTOO_FFMPEG HAVE_FFMPEG_FFMPEG)
 if(WITH_FFMPEG)
-  if(WIN32)
+  if(WIN32 AND NOT ARM)
     include("${OpenCV_SOURCE_DIR}/3rdparty/ffmpeg/ffmpeg_version.cmake")
   elseif(UNIX)
     CHECK_MODULE(libavcodec HAVE_FFMPEG_CODEC)
diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp
index 5335fa01f..8902e69de 100644
--- a/modules/core/include/opencv2/core/internal.hpp
+++ b/modules/core/include/opencv2/core/internal.hpp
@@ -136,7 +136,6 @@ CV_INLINE IppiSize ippiSize(int width, int height)
 #ifdef __ARM_NEON__
 #  include <arm_neon.h>
 #  define CV_NEON 1
-#  define CPU_HAS_NEON_FEATURE (true)
 #endif
 
 #ifndef CV_SSE
diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h
index 33e7fe993..be959a51c 100644
--- a/modules/core/include/opencv2/core/types_c.h
+++ b/modules/core/include/opencv2/core/types_c.h
@@ -323,7 +323,12 @@ CV_INLINE  int  cvRound( double value )
 #  endif
 #else
     // while this is not IEEE754-compliant rounding, it's usually a good enough approximation
-    return (int)(value + (value >= 0 ? 0.5 : -0.5));
+    double intpart, fractpart;
+    fractpart = modf(value, &intpart);
+    if ((abs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0))
+        return (int)(value + (value >= 0 ? 0.5 : -0.5));
+    else
+        return (int)intpart;
 #endif
 }
 
diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp
index b62f10a2a..e069e5298 100644
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -999,25 +999,22 @@ static int normHamming(const uchar* a, int n)
 {
     int i = 0, result = 0;
 #if CV_NEON
-    if (CPU_HAS_NEON_FEATURE)
-    {
-        uint32x4_t bits = vmovq_n_u32(0);
-        for (; i <= n - 16; i += 16) {
-            uint8x16_t A_vec = vld1q_u8 (a + i);
-            uint8x16_t bitsSet = vcntq_u8 (A_vec);
-            uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
-            uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
-            bits = vaddq_u32(bits, bitSet4);
-        }
-        uint64x2_t bitSet2 = vpaddlq_u32 (bits);
-        result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
-        result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+    uint32x4_t bits = vmovq_n_u32(0);
+    for (; i <= n - 16; i += 16) {
+        uint8x16_t A_vec = vld1q_u8 (a + i);
+        uint8x16_t bitsSet = vcntq_u8 (A_vec);
+        uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
+        uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
+        bits = vaddq_u32(bits, bitSet4);
     }
-    else
-#endif
-        for( ; i <= n - 4; i += 4 )
+    uint64x2_t bitSet2 = vpaddlq_u32 (bits);
+    result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
+    result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+#else
+    for( ; i <= n - 4; i += 4 )
             result += popCountTable[a[i]] + popCountTable[a[i+1]] +
             popCountTable[a[i+2]] + popCountTable[a[i+3]];
+#endif
     for( ; i < n; i++ )
         result += popCountTable[a[i]];
     return result;
@@ -1027,27 +1024,24 @@ int normHamming(const uchar* a, const uchar* b, int n)
 {
     int i = 0, result = 0;
 #if CV_NEON
-    if (CPU_HAS_NEON_FEATURE)
-    {
-        uint32x4_t bits = vmovq_n_u32(0);
-        for (; i <= n - 16; i += 16) {
-            uint8x16_t A_vec = vld1q_u8 (a + i);
-            uint8x16_t B_vec = vld1q_u8 (b + i);
-            uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
-            uint8x16_t bitsSet = vcntq_u8 (AxorB);
-            uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
-            uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
-            bits = vaddq_u32(bits, bitSet4);
-        }
-        uint64x2_t bitSet2 = vpaddlq_u32 (bits);
-        result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
-        result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+    uint32x4_t bits = vmovq_n_u32(0);
+    for (; i <= n - 16; i += 16) {
+        uint8x16_t A_vec = vld1q_u8 (a + i);
+        uint8x16_t B_vec = vld1q_u8 (b + i);
+        uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
+        uint8x16_t bitsSet = vcntq_u8 (AxorB);
+        uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
+        uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
+        bits = vaddq_u32(bits, bitSet4);
     }
-    else
+    uint64x2_t bitSet2 = vpaddlq_u32 (bits);
+    result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
+    result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+#else
+    for( ; i <= n - 4; i += 4 )
+        result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
+                popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
 #endif
-        for( ; i <= n - 4; i += 4 )
-            result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
-                    popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
     for( ; i < n; i++ )
         result += popCountTable[a[i] ^ b[i]];
     return result;
diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp
index ebc9eae64..a3e61f22a 100644
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -1551,3 +1551,16 @@ TEST(Core_Add, AddToColumnWhen4Rows)
 
     ASSERT_EQ(0, countNonZero(m1 - m2));
 }
+
+TEST(Core_round, CvRound)
+{
+    ASSERT_EQ(2, cvRound(2.0));
+    ASSERT_EQ(2, cvRound(2.1));
+    ASSERT_EQ(-2, cvRound(-2.1));
+    ASSERT_EQ(3, cvRound(2.8));
+    ASSERT_EQ(-3, cvRound(-2.8));
+    ASSERT_EQ(2, cvRound(2.5));
+    ASSERT_EQ(4, cvRound(3.5));
+    ASSERT_EQ(-2, cvRound(-2.5));
+    ASSERT_EQ(-4, cvRound(-3.5));
+}
\ No newline at end of file
diff --git a/modules/flann/include/opencv2/flann/dist.h b/modules/flann/include/opencv2/flann/dist.h
index d2674305c..7380d0c5d 100644
--- a/modules/flann/include/opencv2/flann/dist.h
+++ b/modules/flann/include/opencv2/flann/dist.h
@@ -456,7 +456,6 @@ struct Hamming
     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
     {
         ResultType result = 0;
-#ifdef __GNUC__
 #ifdef __ARM_NEON__
         {
             uint32x4_t bits = vmovq_n_u32(0);
@@ -473,7 +472,7 @@ struct Hamming
             result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
             result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
         }
-#else
+#elif __GNUC__
         {
             //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
             typedef unsigned long long pop_t;
@@ -493,8 +492,8 @@ struct Hamming
                 result += __builtin_popcountll(a_final ^ b_final);
             }
         }
-#endif //NEON
-#else
+#else // NO NEON and NOT GNUC
+        typedef unsigned long long pop_t;
         HammingLUT lut;
         result = lut(reinterpret_cast<const unsigned char*> (a),
                      reinterpret_cast<const unsigned char*> (b), size * sizeof(pop_t));
diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
index 6d92455fa..59ec616d3 100644
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@@ -89,10 +89,8 @@ if(HAVE_QT)
   if(${_have_flag})
     set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations)
   endif()
-elseif(WIN32)
-  if (NOT ARM)
-    list(APPEND highgui_srcs src/window_w32.cpp)
-  endif()
+elseif(WIN32 AND NOT ARM)
+  list(APPEND highgui_srcs src/window_w32.cpp)
 elseif(HAVE_GTK)
   list(APPEND highgui_srcs src/window_gtk.cpp)
 elseif(APPLE)
@@ -107,12 +105,9 @@ elseif(APPLE)
   endif()
 endif()
 
-if(WIN32)
-  list(APPEND highgui_srcs src/cap_dshow.cpp)
-  if (NOT ARM)
-    list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp)
-  endif()
-endif(WIN32)
+if(WIN32 AND NOT ARM)
+  list(APPEND highgui_srcs src/cap_dshow.cpp src/cap_vfw.cpp src/cap_cmu.cpp)
+endif()
 
 if(HAVE_XINE)
   list(APPEND highgui_srcs src/cap_xine.cpp)
diff --git a/platforms/winrt/scripts/cmake_winrt.cmd b/platforms/winrt/scripts/cmake_winrt.cmd
index 3dd20e4d3..aafed7d09 100644
--- a/platforms/winrt/scripts/cmake_winrt.cmd
+++ b/platforms/winrt/scripts/cmake_winrt.cmd
@@ -3,4 +3,4 @@ cd build
 
 rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
 
-cmake.exe -GNinja -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\..
+cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\..