From 6f68640d4d0fb524645c25b87a064fd2fbae81fb Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 26 Mar 2013 17:19:52 -0700 Subject: [PATCH] Multiple fixes for WinRT Fixed flann build with NEON; Fixed Haming distance with NEON; Honest cvRound for WinRT added; cvRound test added; Video IO with direct show disabled; --- CMakeLists.txt | 2 +- cmake/OpenCVFindLibsVideo.cmake | 2 +- .../core/include/opencv2/core/internal.hpp | 1 - modules/core/include/opencv2/core/types_c.h | 7 +- modules/core/src/stat.cpp | 64 +++++++++---------- modules/core/test/test_arithm.cpp | 13 ++++ modules/flann/include/opencv2/flann/dist.h | 7 +- modules/highgui/CMakeLists.txt | 15 ++--- platforms/winrt/scripts/cmake_winrt.cmd | 2 +- 9 files changed, 59 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22ee7fe7b..9b7f8c2d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,7 +138,7 @@ OCV_OPTION(WITH_CSTRIPES "Include C= support" OFF OCV_OPTION(WITH_TIFF "Include TIFF support" ON IF (NOT IOS) ) OCV_OPTION(WITH_UNICAP "Include Unicap support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_V4L "Include Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) ) -OCV_OPTION(WITH_VIDEOINPUT "Build HighGUI with DirectShow support" ON IF WIN32 ) +OCV_OPTION(WITH_VIDEOINPUT "Build HighGUI with DirectShow support" ON IF WIN32 AND NOT ARM ) OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF IF (NOT ANDROID AND NOT APPLE) ) OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) ) diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index 414918527..3556ba562 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -111,7 +111,7 @@ endif(WITH_XIMEA) # --- FFMPEG --- ocv_clear_vars(HAVE_FFMPEG HAVE_FFMPEG_CODEC HAVE_FFMPEG_FORMAT HAVE_FFMPEG_UTIL HAVE_FFMPEG_SWSCALE HAVE_GENTOO_FFMPEG HAVE_FFMPEG_FFMPEG) if(WITH_FFMPEG) - if(WIN32) + if(WIN32 AND NOT ARM) include("${OpenCV_SOURCE_DIR}/3rdparty/ffmpeg/ffmpeg_version.cmake") elseif(UNIX) CHECK_MODULE(libavcodec HAVE_FFMPEG_CODEC) diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 5335fa01f..8902e69de 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -136,7 +136,6 @@ CV_INLINE IppiSize ippiSize(int width, int height) #ifdef __ARM_NEON__ # include # define CV_NEON 1 -# define CPU_HAS_NEON_FEATURE (true) #endif #ifndef CV_SSE diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h index 33e7fe993..be959a51c 100644 --- a/modules/core/include/opencv2/core/types_c.h +++ b/modules/core/include/opencv2/core/types_c.h @@ -323,7 +323,12 @@ CV_INLINE int cvRound( double value ) # endif #else // while this is not IEEE754-compliant rounding, it's usually a good enough approximation - return (int)(value + (value >= 0 ? 0.5 : -0.5)); + double intpart, fractpart; + fractpart = modf(value, &intpart); + if ((abs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0)) + return (int)(value + (value >= 0 ? 0.5 : -0.5)); + else + return (int)intpart; #endif } diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index b62f10a2a..e069e5298 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -999,25 +999,22 @@ static int normHamming(const uchar* a, int n) { int i = 0, result = 0; #if CV_NEON - if (CPU_HAS_NEON_FEATURE) - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t bitsSet = vcntq_u8 (A_vec); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t bitsSet = vcntq_u8 (A_vec); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); } - else -#endif - for( ; i <= n - 4; i += 4 ) + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); +#else + for( ; i <= n - 4; i += 4 ) result += popCountTable[a[i]] + popCountTable[a[i+1]] + popCountTable[a[i+2]] + popCountTable[a[i+3]]; +#endif for( ; i < n; i++ ) result += popCountTable[a[i]]; return result; @@ -1027,27 +1024,24 @@ int normHamming(const uchar* a, const uchar* b, int n) { int i = 0, result = 0; #if CV_NEON - if (CPU_HAS_NEON_FEATURE) - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t B_vec = vld1q_u8 (b + i); - uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); - uint8x16_t bitsSet = vcntq_u8 (AxorB); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t B_vec = vld1q_u8 (b + i); + uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); + uint8x16_t bitsSet = vcntq_u8 (AxorB); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); } - else + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); +#else + for( ; i <= n - 4; i += 4 ) + result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + + popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; #endif - for( ; i <= n - 4; i += 4 ) - result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + - popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; for( ; i < n; i++ ) result += popCountTable[a[i] ^ b[i]]; return result; diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index ebc9eae64..a3e61f22a 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -1551,3 +1551,16 @@ TEST(Core_Add, AddToColumnWhen4Rows) ASSERT_EQ(0, countNonZero(m1 - m2)); } + +TEST(Core_round, CvRound) +{ + ASSERT_EQ(2, cvRound(2.0)); + ASSERT_EQ(2, cvRound(2.1)); + ASSERT_EQ(-2, cvRound(-2.1)); + ASSERT_EQ(3, cvRound(2.8)); + ASSERT_EQ(-3, cvRound(-2.8)); + ASSERT_EQ(2, cvRound(2.5)); + ASSERT_EQ(4, cvRound(3.5)); + ASSERT_EQ(-2, cvRound(-2.5)); + ASSERT_EQ(-4, cvRound(-3.5)); +} \ No newline at end of file diff --git a/modules/flann/include/opencv2/flann/dist.h b/modules/flann/include/opencv2/flann/dist.h index d2674305c..7380d0c5d 100644 --- a/modules/flann/include/opencv2/flann/dist.h +++ b/modules/flann/include/opencv2/flann/dist.h @@ -456,7 +456,6 @@ struct Hamming ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const { ResultType result = 0; -#ifdef __GNUC__ #ifdef __ARM_NEON__ { uint32x4_t bits = vmovq_n_u32(0); @@ -473,7 +472,7 @@ struct Hamming result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); } -#else +#elif __GNUC__ { //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) typedef unsigned long long pop_t; @@ -493,8 +492,8 @@ struct Hamming result += __builtin_popcountll(a_final ^ b_final); } } -#endif //NEON -#else +#else // NO NEON and NOT GNUC + typedef unsigned long long pop_t; HammingLUT lut; result = lut(reinterpret_cast (a), reinterpret_cast (b), size * sizeof(pop_t)); diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 6d92455fa..59ec616d3 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -89,10 +89,8 @@ if(HAVE_QT) if(${_have_flag}) set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) endif() -elseif(WIN32) - if (NOT ARM) - list(APPEND highgui_srcs src/window_w32.cpp) - endif() +elseif(WIN32 AND NOT ARM) + list(APPEND highgui_srcs src/window_w32.cpp) elseif(HAVE_GTK) list(APPEND highgui_srcs src/window_gtk.cpp) elseif(APPLE) @@ -107,12 +105,9 @@ elseif(APPLE) endif() endif() -if(WIN32) - list(APPEND highgui_srcs src/cap_dshow.cpp) - if (NOT ARM) - list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp) - endif() -endif(WIN32) +if(WIN32 AND NOT ARM) + list(APPEND highgui_srcs src/cap_dshow.cpp src/cap_vfw.cpp src/cap_cmu.cpp) +endif() if(HAVE_XINE) list(APPEND highgui_srcs src/cap_xine.cpp) diff --git a/platforms/winrt/scripts/cmake_winrt.cmd b/platforms/winrt/scripts/cmake_winrt.cmd index 3dd20e4d3..aafed7d09 100644 --- a/platforms/winrt/scripts/cmake_winrt.cmd +++ b/platforms/winrt/scripts/cmake_winrt.cmd @@ -3,4 +3,4 @@ cd build rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat" -cmake.exe -GNinja -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\.. +cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\..