From 35b21399298175e8e0476c88a7f0a6248bad4f8b Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 26 Mar 2015 15:25:42 +0300 Subject: [PATCH] Basic HAL module --- apps/annotation/CMakeLists.txt | 2 +- apps/createsamples/CMakeLists.txt | 2 +- apps/traincascade/CMakeLists.txt | 2 +- cmake/OpenCVModule.cmake | 17 ++ modules/core/include/opencv2/core/base.hpp | 19 +- modules/core/include/opencv2/core/cvdef.h | 233 +------------- modules/core/src/precomp.hpp | 2 + modules/core/src/stat.cpp | 182 +++-------- .../features2d/include/opencv2/features2d.hpp | 32 -- modules/hal/CMakeLists.txt | 12 + modules/hal/include/opencv2/hal.hpp | 74 +++++ modules/hal/include/opencv2/hal/defs.h | 284 ++++++++++++++++++ modules/hal/src/norm.cpp | 184 ++++++++++++ modules/hal/src/precomp.hpp | 2 + modules/java/CMakeLists.txt | 1 + samples/cpp/CMakeLists.txt | 2 +- samples/directx/CMakeLists.txt | 2 +- samples/gpu/CMakeLists.txt | 2 +- samples/tapi/CMakeLists.txt | 2 +- 19 files changed, 641 insertions(+), 415 deletions(-) create mode 100644 modules/hal/CMakeLists.txt create mode 100644 modules/hal/include/opencv2/hal.hpp create mode 100644 modules/hal/include/opencv2/hal/defs.h create mode 100644 modules/hal/src/norm.cpp create mode 100644 modules/hal/src/precomp.hpp diff --git a/apps/annotation/CMakeLists.txt b/apps/annotation/CMakeLists.txt index e14721ac6..57b133df1 100644 --- a/apps/annotation/CMakeLists.txt +++ b/apps/annotation/CMakeLists.txt @@ -9,7 +9,7 @@ project(annotation) set(the_target opencv_annotation) ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv") -ocv_target_include_modules(${the_target} ${OPENCV_ANNOTATION_DEPS}) +ocv_target_include_modules_recurse(${the_target} ${OPENCV_ANNOTATION_DEPS}) file(GLOB SRCS *.cpp) diff --git a/apps/createsamples/CMakeLists.txt b/apps/createsamples/CMakeLists.txt index 8acd288ac..24506231e 100644 --- a/apps/createsamples/CMakeLists.txt +++ b/apps/createsamples/CMakeLists.txt @@ -9,7 +9,7 @@ project(createsamples) set(the_target opencv_createsamples) ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv") -ocv_target_include_modules(${the_target} ${OPENCV_CREATESAMPLES_DEPS}) +ocv_target_include_modules_recurse(${the_target} ${OPENCV_CREATESAMPLES_DEPS}) file(GLOB SRCS *.cpp) file(GLOB HDRS *.h*) diff --git a/apps/traincascade/CMakeLists.txt b/apps/traincascade/CMakeLists.txt index 78101c0bc..b21fb8736 100644 --- a/apps/traincascade/CMakeLists.txt +++ b/apps/traincascade/CMakeLists.txt @@ -9,7 +9,7 @@ project(traincascade) set(the_target opencv_traincascade) ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv") -ocv_target_include_modules(${the_target} ${OPENCV_TRAINCASCADE_DEPS}) +ocv_target_include_modules_recurse(${the_target} ${OPENCV_TRAINCASCADE_DEPS}) file(GLOB SRCS *.cpp) file(GLOB HDRS *.h*) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 955dcc4af..d1558f406 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -176,6 +176,11 @@ macro(ocv_add_module _name) endif() endif() + # add HAL as dependency + if(NOT "${the_module}" STREQUAL "opencv_hal") + ocv_add_dependencies(${the_module} opencv_hal) + endif() + # add self to the world dependencies if((NOT DEFINED OPENCV_MODULE_IS_PART_OF_WORLD AND NOT OPENCV_MODULE_${the_module}_CLASS STREQUAL "BINDINGS" @@ -517,6 +522,18 @@ macro(ocv_include_modules) endforeach() endmacro() +# same as previous but with dependencies +macro(ocv_include_modules_recurse) + ocv_include_modules(${ARGN}) + foreach(d ${ARGN}) + if(d MATCHES "^opencv_" AND HAVE_${d} AND DEFINED OPENCV_MODULE_${d}_DEPS) + foreach (sub ${OPENCV_MODULE_${d}_DEPS}) + ocv_include_modules(${sub}) + endforeach() + endif() + endforeach() +endmacro() + # setup include paths for the list of passed modules macro(ocv_target_include_modules target) foreach(d ${ARGN}) diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index 73beb911f..08a6642fb 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -503,8 +503,6 @@ CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n); CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n); -CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n); -CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize); CV_EXPORTS float normL1_(const float* a, const float* b, int n); CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n); @@ -532,6 +530,23 @@ in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees. */ CV_EXPORTS_W float fastAtan2(float y, float x); +/* + * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor + * bit count of A exclusive XOR'ed with B + */ +struct CV_EXPORTS Hamming +{ + enum { normType = NORM_HAMMING }; + typedef unsigned char ValueType; + typedef int ResultType; + + /** this will count the bits in a ^ b + */ + ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const; +}; + +typedef Hamming HammingLUT; + /////////////////////////////////// inline norms //////////////////////////////////// diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 3498b0918..af0a271a4 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -56,23 +56,7 @@ #undef abs #undef Complex -#if defined __ICL -# define CV_ICC __ICL -#elif defined __ICC -# define CV_ICC __ICC -#elif defined __ECL -# define CV_ICC __ECL -#elif defined __ECC -# define CV_ICC __ECC -#elif defined __INTEL_COMPILER -# define CV_ICC __INTEL_COMPILER -#endif - -#if defined CV_ICC && !defined CV_ENABLE_UNROLLED -# define CV_ENABLE_UNROLLED 0 -#else -# define CV_ENABLE_UNROLLED 1 -#endif +#include "opencv2/hal/defs.h" #ifdef __OPENCV_BUILD # define DISABLE_OPENCV_24_COMPATIBILITY @@ -104,216 +88,6 @@ # endif #endif -/* CPU features and intrinsics support */ -#define CV_CPU_NONE 0 -#define CV_CPU_MMX 1 -#define CV_CPU_SSE 2 -#define CV_CPU_SSE2 3 -#define CV_CPU_SSE3 4 -#define CV_CPU_SSSE3 5 -#define CV_CPU_SSE4_1 6 -#define CV_CPU_SSE4_2 7 -#define CV_CPU_POPCNT 8 - -#define CV_CPU_AVX 10 -#define CV_CPU_AVX2 11 -#define CV_CPU_FMA3 12 - -#define CV_CPU_AVX_512F 13 -#define CV_CPU_AVX_512BW 14 -#define CV_CPU_AVX_512CD 15 -#define CV_CPU_AVX_512DQ 16 -#define CV_CPU_AVX_512ER 17 -#define CV_CPU_AVX_512IFMA512 18 -#define CV_CPU_AVX_512PF 19 -#define CV_CPU_AVX_512VBMI 20 -#define CV_CPU_AVX_512VL 21 - -#define CV_CPU_NEON 100 - -// when adding to this list remember to update the enum in core/utility.cpp -#define CV_HARDWARE_MAX_FEATURE 255 - -// do not include SSE/AVX/NEON headers for NVCC compiler -#ifndef __CUDACC__ - -#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) -# include -# define CV_MMX 1 -# define CV_SSE 1 -# define CV_SSE2 1 -# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE3 1 -# endif -# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSSE3 1 -# endif -# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_1 1 -# endif -# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_2 1 -# endif -# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) -# ifdef _MSC_VER -# include -# else -# include -# endif -# define CV_POPCNT 1 -# endif -# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) -// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX -// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 -# include -# define CV_AVX 1 -# if defined(_XCR_XFEATURE_ENABLED_MASK) -# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) -# else -# define __xgetbv() 0 -# endif -# endif -# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) -# include -# define CV_AVX2 1 -# if defined __FMA__ -# define CV_FMA3 1 -# endif -# endif -#endif - -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) -# include -# include "arm_neon.h" -# define CV_NEON 1 -# define CPU_HAS_NEON_FEATURE (true) -#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) -# include -# define CV_NEON 1 -#endif - -#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__) -# define CV_VFP 1 -#endif - -#endif // __CUDACC__ - -#ifndef CV_POPCNT -#define CV_POPCNT 0 -#endif -#ifndef CV_MMX -# define CV_MMX 0 -#endif -#ifndef CV_SSE -# define CV_SSE 0 -#endif -#ifndef CV_SSE2 -# define CV_SSE2 0 -#endif -#ifndef CV_SSE3 -# define CV_SSE3 0 -#endif -#ifndef CV_SSSE3 -# define CV_SSSE3 0 -#endif -#ifndef CV_SSE4_1 -# define CV_SSE4_1 0 -#endif -#ifndef CV_SSE4_2 -# define CV_SSE4_2 0 -#endif -#ifndef CV_AVX -# define CV_AVX 0 -#endif -#ifndef CV_AVX2 -# define CV_AVX2 0 -#endif -#ifndef CV_FMA3 -# define CV_FMA3 0 -#endif -#ifndef CV_AVX_512F -# define CV_AVX_512F 0 -#endif -#ifndef CV_AVX_512BW -# define CV_AVX_512BW 0 -#endif -#ifndef CV_AVX_512CD -# define CV_AVX_512CD 0 -#endif -#ifndef CV_AVX_512DQ -# define CV_AVX_512DQ 0 -#endif -#ifndef CV_AVX_512ER -# define CV_AVX_512ER 0 -#endif -#ifndef CV_AVX_512IFMA512 -# define CV_AVX_512IFMA512 0 -#endif -#ifndef CV_AVX_512PF -# define CV_AVX_512PF 0 -#endif -#ifndef CV_AVX_512VBMI -# define CV_AVX_512VBMI 0 -#endif -#ifndef CV_AVX_512VL -# define CV_AVX_512VL 0 -#endif - -#ifndef CV_NEON -# define CV_NEON 0 -#endif - -#ifndef CV_VFP -# define CV_VFP 0 -#endif - -/* primitive types */ -/* - schar - signed 1 byte integer - uchar - unsigned 1 byte integer - short - signed 2 byte integer - ushort - unsigned 2 byte integer - int - signed 4 byte integer - uint - unsigned 4 byte integer - int64 - signed 8 byte integer - uint64 - unsigned 8 byte integer -*/ - -#if !defined _MSC_VER && !defined __BORLANDC__ -# if defined __cplusplus && __cplusplus >= 201103L -# include - typedef std::uint32_t uint; -# else -# include - typedef uint32_t uint; -# endif -#else - typedef unsigned uint; -#endif - -typedef signed char schar; - -#ifndef __IPL_H__ - typedef unsigned char uchar; - typedef unsigned short ushort; -#endif - -#if defined _MSC_VER || defined __BORLANDC__ - typedef __int64 int64; - typedef unsigned __int64 uint64; -# define CV_BIG_INT(n) n##I64 -# define CV_BIG_UINT(n) n##UI64 -#else - typedef int64_t int64; - typedef uint64_t uint64; -# define CV_BIG_INT(n) n##LL -# define CV_BIG_UINT(n) n##ULL -#endif - /* special informative macros for wrapper generators */ #define CV_EXPORTS_W CV_EXPORTS #define CV_EXPORTS_W_SIMPLE CV_EXPORTS @@ -326,11 +100,6 @@ typedef signed char schar; #define CV_WRAP #define CV_WRAP_AS(synonym) -/* fundamental constants */ -#define CV_PI 3.1415926535897932384626433832795 -#define CV_2PI 6.283185307179586476925286766559 -#define CV_LOG2 0.69314718055994530941723212145818 - /****************************************************************************************\ * Matrix type (Mat) * \****************************************************************************************/ diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index fa6120371..e5007e5d1 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -55,6 +55,8 @@ #include "opencv2/core/private.cuda.hpp" #include "opencv2/core/ocl.hpp" +#include "opencv2/hal.hpp" + #include #include #include diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 41218904f..ca707e78a 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -2550,140 +2550,6 @@ int normL1_(const uchar* a, const uchar* b, int n) return d; } -static const uchar popCountTable[] = -{ - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - -static const uchar popCountTable2[] = -{ - 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4 -}; - -static const uchar popCountTable4[] = -{ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -static int normHamming(const uchar* a, int n) -{ - int i = 0, result = 0; -#if CV_NEON - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t bitsSet = vcntq_u8 (A_vec); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); - } -#endif - for( ; i <= n - 4; i += 4 ) - result += popCountTable[a[i]] + popCountTable[a[i+1]] + - popCountTable[a[i+2]] + popCountTable[a[i+3]]; - for( ; i < n; i++ ) - result += popCountTable[a[i]]; - return result; -} - -int normHamming(const uchar* a, const uchar* b, int n) -{ - int i = 0, result = 0; -#if CV_NEON - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t B_vec = vld1q_u8 (b + i); - uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); - uint8x16_t bitsSet = vcntq_u8 (AxorB); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); - } -#endif - for( ; i <= n - 4; i += 4 ) - result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + - popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; - for( ; i < n; i++ ) - result += popCountTable[a[i] ^ b[i]]; - return result; -} - -static int normHamming(const uchar* a, int n, int cellSize) -{ - if( cellSize == 1 ) - return normHamming(a, n); - const uchar* tab = 0; - if( cellSize == 2 ) - tab = popCountTable2; - else if( cellSize == 4 ) - tab = popCountTable4; - else - CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" ); - int i = 0, result = 0; -#if CV_ENABLE_UNROLLED - for( ; i <= n - 4; i += 4 ) - result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]]; -#endif - for( ; i < n; i++ ) - result += tab[a[i]]; - return result; -} - -int normHamming(const uchar* a, const uchar* b, int n, int cellSize) -{ - if( cellSize == 1 ) - return normHamming(a, b, n); - const uchar* tab = 0; - if( cellSize == 2 ) - tab = popCountTable2; - else if( cellSize == 4 ) - tab = popCountTable4; - else - CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" ); - int i = 0, result = 0; - #if CV_ENABLE_UNROLLED - for( ; i <= n - 4; i += 4 ) - result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] + - tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]]; - #endif - for( ; i < n; i++ ) - result += tab[a[i] ^ b[i]]; - return result; -} - - template int normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn) { @@ -2816,6 +2682,12 @@ normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int le return 0; } +Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const +{ + int result = 0; + cv::hal::normHamming(a, b, size, result); + return result; +} #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \ static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \ @@ -3164,10 +3036,18 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) const uchar* data = src.ptr(); if( normType == NORM_HAMMING ) - return normHamming(data, (int)len); + { + int result = 0; + cv::hal::normHamming(data, (int)len, result); + return result; + } if( normType == NORM_HAMMING2 ) - return normHamming(data, (int)len, 2); + { + int result = 0; + hal::normHamming(data, (int)len, 2, result); + return result; + } } } } @@ -3191,7 +3071,11 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) int result = 0; for( size_t i = 0; i < it.nplanes; i++, ++it ) - result += normHamming(ptrs[0], total, cellSize); + { + int one = 0; + cv::hal::normHamming(ptrs[0], total, cellSize, one); + result += one; + } return result; } @@ -3673,7 +3557,11 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m int result = 0; for( size_t i = 0; i < it.nplanes; i++, ++it ) - result += normHamming(ptrs[0], ptrs[1], total, cellSize); + { + int one = 0; + hal::normHamming(ptrs[0], ptrs[1], total, cellSize, one); + result += one; + } return result; } @@ -3810,13 +3698,18 @@ static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2, if( !mask ) { for( int i = 0; i < nvecs; i++ ) - dist[i] = normHamming(src1, src2 + step2*i, len); + hal::normHamming(src1, src2 + step2*i, len, dist[i]); } else { int val0 = INT_MAX; for( int i = 0; i < nvecs; i++ ) - dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0; + { + if (mask[i]) + hal::normHamming(src1, src2 + step2*i, len, dist[i]); + else + dist[i] = val0; + } } } @@ -3827,13 +3720,18 @@ static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2 if( !mask ) { for( int i = 0; i < nvecs; i++ ) - dist[i] = normHamming(src1, src2 + step2*i, len, 2); + hal::normHamming(src1, src2 + step2*i, len, 2, dist[i]); } else { int val0 = INT_MAX; for( int i = 0; i < nvecs; i++ ) - dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0; + { + if (mask[i]) + hal::normHamming(src1, src2 + step2*i, len, 2, dist[i]); + else + dist[i] = val0; + } } } diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 3d7017228..4d27c4103 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -679,38 +679,6 @@ struct CV_EXPORTS L1 } }; -/* - * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor - * bit count of A exclusive XOR'ed with B - */ -struct CV_EXPORTS Hamming -{ - enum { normType = NORM_HAMMING }; - typedef unsigned char ValueType; - typedef int ResultType; - - /** this will count the bits in a ^ b - */ - ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const - { - return normHamming(a, b, size); - } -}; - -typedef Hamming HammingLUT; - -template struct HammingMultilevel -{ - enum { normType = NORM_HAMMING + (cellsize>1) }; - typedef unsigned char ValueType; - typedef int ResultType; - - ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const - { - return normHamming(a, b, size, cellsize); - } -}; - /****************************************************************************************\ * DescriptorMatcher * \****************************************************************************************/ diff --git a/modules/hal/CMakeLists.txt b/modules/hal/CMakeLists.txt new file mode 100644 index 000000000..b5b2abb81 --- /dev/null +++ b/modules/hal/CMakeLists.txt @@ -0,0 +1,12 @@ +set(the_description "The Hardware Acceleration Layer (HAL) module") + +set(OPENCV_MODULE_TYPE STATIC) +# set(OPENCV_MODULE_IS_PART_OF_WORLD FALSE) + +if(UNIX) + if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + endif() +endif() + +ocv_define_module(hal) diff --git a/modules/hal/include/opencv2/hal.hpp b/modules/hal/include/opencv2/hal.hpp new file mode 100644 index 000000000..d0e135080 --- /dev/null +++ b/modules/hal/include/opencv2/hal.hpp @@ -0,0 +1,74 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_HAL_HPP__ +#define __OPENCV_HAL_HPP__ + +#include "opencv2/hal/defs.h" + +/** + @defgroup hal Hardware Acceleration Layer +*/ + +namespace cv { namespace hal { + +namespace Error { + +enum Code +{ + Ok = 0, + Unknown = -1 +}; + +} + +Error::Code normHamming(const uchar* a, int n, int & result); +Error::Code normHamming(const uchar* a, const uchar* b, int n, int & result); + +Error::Code normHamming(const uchar* a, int n, int cellSize, int & result); +Error::Code normHamming(const uchar* a, const uchar* b, int n, int cellSize, int & result); + +}} //cv::hal + +#endif //__OPENCV_HAL_HPP__ diff --git a/modules/hal/include/opencv2/hal/defs.h b/modules/hal/include/opencv2/hal/defs.h new file mode 100644 index 000000000..96595961f --- /dev/null +++ b/modules/hal/include/opencv2/hal/defs.h @@ -0,0 +1,284 @@ +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_DEF_H__ +#define __OPENCV_DEF_H__ + +#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 +# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ +#endif + +#if defined __ICL +# define CV_ICC __ICL +#elif defined __ICC +# define CV_ICC __ICC +#elif defined __ECL +# define CV_ICC __ECL +#elif defined __ECC +# define CV_ICC __ECC +#elif defined __INTEL_COMPILER +# define CV_ICC __INTEL_COMPILER +#endif + +#if defined CV_ICC && !defined CV_ENABLE_UNROLLED +# define CV_ENABLE_UNROLLED 0 +#else +# define CV_ENABLE_UNROLLED 1 +#endif + +/* CPU features and intrinsics support */ +#define CV_CPU_NONE 0 +#define CV_CPU_MMX 1 +#define CV_CPU_SSE 2 +#define CV_CPU_SSE2 3 +#define CV_CPU_SSE3 4 +#define CV_CPU_SSSE3 5 +#define CV_CPU_SSE4_1 6 +#define CV_CPU_SSE4_2 7 +#define CV_CPU_POPCNT 8 + +#define CV_CPU_AVX 10 +#define CV_CPU_AVX2 11 +#define CV_CPU_FMA3 12 + +#define CV_CPU_AVX_512F 13 +#define CV_CPU_AVX_512BW 14 +#define CV_CPU_AVX_512CD 15 +#define CV_CPU_AVX_512DQ 16 +#define CV_CPU_AVX_512ER 17 +#define CV_CPU_AVX_512IFMA512 18 +#define CV_CPU_AVX_512PF 19 +#define CV_CPU_AVX_512VBMI 20 +#define CV_CPU_AVX_512VL 21 + +#define CV_CPU_NEON 100 + +// when adding to this list remember to update the enum in core/utility.cpp +#define CV_HARDWARE_MAX_FEATURE 255 + +// do not include SSE/AVX/NEON headers for NVCC compiler +#ifndef __CUDACC__ + +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSE3 1 +# endif +# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSSE3 1 +# endif +# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSE4_1 1 +# endif +# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSE4_2 1 +# endif +# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) +# ifdef _MSC_VER +# include +# else +# include +# endif +# define CV_POPCNT 1 +# endif +# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) +// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX +// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 +# include +# define CV_AVX 1 +# if defined(_XCR_XFEATURE_ENABLED_MASK) +# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) +# else +# define __xgetbv() 0 +# endif +# endif +# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) +# include +# define CV_AVX2 1 +# if defined __FMA__ +# define CV_FMA3 1 +# endif +# endif +#endif + +#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) +# include +# include "arm_neon.h" +# define CV_NEON 1 +# define CPU_HAS_NEON_FEATURE (true) +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#endif + +#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__) +# define CV_VFP 1 +#endif + +#endif // __CUDACC__ + +#ifndef CV_POPCNT +#define CV_POPCNT 0 +#endif +#ifndef CV_MMX +# define CV_MMX 0 +#endif +#ifndef CV_SSE +# define CV_SSE 0 +#endif +#ifndef CV_SSE2 +# define CV_SSE2 0 +#endif +#ifndef CV_SSE3 +# define CV_SSE3 0 +#endif +#ifndef CV_SSSE3 +# define CV_SSSE3 0 +#endif +#ifndef CV_SSE4_1 +# define CV_SSE4_1 0 +#endif +#ifndef CV_SSE4_2 +# define CV_SSE4_2 0 +#endif +#ifndef CV_AVX +# define CV_AVX 0 +#endif +#ifndef CV_AVX2 +# define CV_AVX2 0 +#endif +#ifndef CV_FMA3 +# define CV_FMA3 0 +#endif +#ifndef CV_AVX_512F +# define CV_AVX_512F 0 +#endif +#ifndef CV_AVX_512BW +# define CV_AVX_512BW 0 +#endif +#ifndef CV_AVX_512CD +# define CV_AVX_512CD 0 +#endif +#ifndef CV_AVX_512DQ +# define CV_AVX_512DQ 0 +#endif +#ifndef CV_AVX_512ER +# define CV_AVX_512ER 0 +#endif +#ifndef CV_AVX_512IFMA512 +# define CV_AVX_512IFMA512 0 +#endif +#ifndef CV_AVX_512PF +# define CV_AVX_512PF 0 +#endif +#ifndef CV_AVX_512VBMI +# define CV_AVX_512VBMI 0 +#endif +#ifndef CV_AVX_512VL +# define CV_AVX_512VL 0 +#endif + +#ifndef CV_NEON +# define CV_NEON 0 +#endif + +#ifndef CV_VFP +# define CV_VFP 0 +#endif + +/* primitive types */ +/* + schar - signed 1 byte integer + uchar - unsigned 1 byte integer + short - signed 2 byte integer + ushort - unsigned 2 byte integer + int - signed 4 byte integer + uint - unsigned 4 byte integer + int64 - signed 8 byte integer + uint64 - unsigned 8 byte integer +*/ + +#if !defined _MSC_VER && !defined __BORLANDC__ +# if defined __cplusplus && __cplusplus >= 201103L +# include + typedef std::uint32_t uint; +# else +# include + typedef uint32_t uint; +# endif +#else + typedef unsigned uint; +#endif + +typedef signed char schar; + +#ifndef __IPL_H__ + typedef unsigned char uchar; + typedef unsigned short ushort; +#endif + +#if defined _MSC_VER || defined __BORLANDC__ + typedef __int64 int64; + typedef unsigned __int64 uint64; +# define CV_BIG_INT(n) n##I64 +# define CV_BIG_UINT(n) n##UI64 +#else + typedef int64_t int64; + typedef uint64_t uint64; +# define CV_BIG_INT(n) n##LL +# define CV_BIG_UINT(n) n##ULL +#endif + +/* fundamental constants */ +#define CV_PI 3.1415926535897932384626433832795 +#define CV_2PI 6.283185307179586476925286766559 +#define CV_LOG2 0.69314718055994530941723212145818 + +#endif //__OPENCV_HAL_H__ diff --git a/modules/hal/src/norm.cpp b/modules/hal/src/norm.cpp new file mode 100644 index 000000000..bdcf9ed72 --- /dev/null +++ b/modules/hal/src/norm.cpp @@ -0,0 +1,184 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +namespace cv { namespace hal { + +static const uchar popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +static const uchar popCountTable2[] = +{ + 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4 +}; + +static const uchar popCountTable4[] = +{ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +Error::Code normHamming(const uchar* a, int n, int & result) +{ + int i = 0; + result = 0; +#if CV_NEON + { + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t bitsSet = vcntq_u8 (A_vec); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); + } + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + } +#endif + for( ; i <= n - 4; i += 4 ) + result += popCountTable[a[i]] + popCountTable[a[i+1]] + + popCountTable[a[i+2]] + popCountTable[a[i+3]]; + for( ; i < n; i++ ) + result += popCountTable[a[i]]; + return Error::Ok; +} + +Error::Code normHamming(const uchar* a, const uchar* b, int n, int & result) +{ + int i = 0; + result = 0; +#if CV_NEON + { + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t B_vec = vld1q_u8 (b + i); + uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); + uint8x16_t bitsSet = vcntq_u8 (AxorB); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); + } + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + } +#endif + for( ; i <= n - 4; i += 4 ) + result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + + popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; + for( ; i < n; i++ ) + result += popCountTable[a[i] ^ b[i]]; + return Error::Ok; +} + +Error::Code normHamming(const uchar* a, int n, int cellSize, int & result) +{ + if( cellSize == 1 ) + return normHamming(a, n, result); + const uchar* tab = 0; + if( cellSize == 2 ) + tab = popCountTable2; + else if( cellSize == 4 ) + tab = popCountTable4; + else + return Error::Unknown; + int i = 0; + result = 0; +#if CV_ENABLE_UNROLLED + for( ; i <= n - 4; i += 4 ) + result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]]; +#endif + for( ; i < n; i++ ) + result += tab[a[i]]; + return Error::Ok; +} + +Error::Code normHamming(const uchar* a, const uchar* b, int n, int cellSize, int & result) +{ + if( cellSize == 1 ) + return normHamming(a, b, n, result); + const uchar* tab = 0; + if( cellSize == 2 ) + tab = popCountTable2; + else if( cellSize == 4 ) + tab = popCountTable4; + else + return Error::Unknown; + int i = 0; + result = 0; + #if CV_ENABLE_UNROLLED + for( ; i <= n - 4; i += 4 ) + result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] + + tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]]; + #endif + for( ; i < n; i++ ) + result += tab[a[i] ^ b[i]]; + return Error::Ok; +} + +}} //cv::hal diff --git a/modules/hal/src/precomp.hpp b/modules/hal/src/precomp.hpp new file mode 100644 index 000000000..04ec37821 --- /dev/null +++ b/modules/hal/src/precomp.hpp @@ -0,0 +1,2 @@ + +#include "opencv2/hal.hpp" diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt index 38f410e64..3ba9c54dc 100644 --- a/modules/java/CMakeLists.txt +++ b/modules/java/CMakeLists.txt @@ -10,6 +10,7 @@ endif() set(the_description "The java bindings") ocv_add_module(java BINDINGS opencv_core opencv_imgproc) ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp") +ocv_module_include_directories("${OpenCV_SOURCE_DIR}/include") if(NOT ANDROID) include_directories(${JNI_INCLUDE_DIRS}) diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt index 4dfb65d21..ff5caa9fa 100644 --- a/samples/cpp/CMakeLists.txt +++ b/samples/cpp/CMakeLists.txt @@ -15,7 +15,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) project(cpp_samples) ocv_include_directories("${OpenCV_SOURCE_DIR}/include")#for opencv.hpp - ocv_include_modules(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) + ocv_include_modules_recurse(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) if(HAVE_opencv_cudaoptflow) ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/cudaoptflow/include") diff --git a/samples/directx/CMakeLists.txt b/samples/directx/CMakeLists.txt index 15a657537..21ada16ed 100644 --- a/samples/directx/CMakeLists.txt +++ b/samples/directx/CMakeLists.txt @@ -8,7 +8,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) project("${project}_samples") - ocv_include_modules(${OPENCV_DIRECTX_SAMPLES_REQUIRED_DEPS}) + ocv_include_modules_recurse(${OPENCV_DIRECTX_SAMPLES_REQUIRED_DEPS}) # --------------------------------------------- # Define executable targets diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt index 8741f1170..32c53ecf1 100644 --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@ -13,7 +13,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) project("${project}_samples") - ocv_include_modules(${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS}) + ocv_include_modules_recurse(${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS}) ocv_include_directories( "${OpenCV_SOURCE_DIR}/modules/gpu/src/nvidia" "${OpenCV_SOURCE_DIR}/modules/gpu/src/nvidia/core" diff --git a/samples/tapi/CMakeLists.txt b/samples/tapi/CMakeLists.txt index 9c69ab0a2..6f8d873ef 100644 --- a/samples/tapi/CMakeLists.txt +++ b/samples/tapi/CMakeLists.txt @@ -8,7 +8,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) project("${project}_samples") - ocv_include_modules(${OPENCV_TAPI_SAMPLES_REQUIRED_DEPS}) + ocv_include_modules_recurse(${OPENCV_TAPI_SAMPLES_REQUIRED_DEPS}) # --------------------------------------------- # Define executable targets