Multiple fixes for WinRT
Fixed flann build with NEON; Fixed Haming distance with NEON; Honest cvRound for WinRT added; cvRound test added; Video IO with direct show disabled;
This commit is contained in:
@@ -136,7 +136,6 @@ CV_INLINE IppiSize ippiSize(int width, int height)
|
||||
#ifdef __ARM_NEON__
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
# define CPU_HAS_NEON_FEATURE (true)
|
||||
#endif
|
||||
|
||||
#ifndef CV_SSE
|
||||
|
@@ -323,7 +323,12 @@ CV_INLINE int cvRound( double value )
|
||||
# endif
|
||||
#else
|
||||
// while this is not IEEE754-compliant rounding, it's usually a good enough approximation
|
||||
return (int)(value + (value >= 0 ? 0.5 : -0.5));
|
||||
double intpart, fractpart;
|
||||
fractpart = modf(value, &intpart);
|
||||
if ((abs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0))
|
||||
return (int)(value + (value >= 0 ? 0.5 : -0.5));
|
||||
else
|
||||
return (int)intpart;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@@ -999,25 +999,22 @@ static int normHamming(const uchar* a, int n)
|
||||
{
|
||||
int i = 0, result = 0;
|
||||
#if CV_NEON
|
||||
if (CPU_HAS_NEON_FEATURE)
|
||||
{
|
||||
uint32x4_t bits = vmovq_n_u32(0);
|
||||
for (; i <= n - 16; i += 16) {
|
||||
uint8x16_t A_vec = vld1q_u8 (a + i);
|
||||
uint8x16_t bitsSet = vcntq_u8 (A_vec);
|
||||
uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
|
||||
uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
|
||||
bits = vaddq_u32(bits, bitSet4);
|
||||
}
|
||||
uint64x2_t bitSet2 = vpaddlq_u32 (bits);
|
||||
result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
|
||||
result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
|
||||
uint32x4_t bits = vmovq_n_u32(0);
|
||||
for (; i <= n - 16; i += 16) {
|
||||
uint8x16_t A_vec = vld1q_u8 (a + i);
|
||||
uint8x16_t bitsSet = vcntq_u8 (A_vec);
|
||||
uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
|
||||
uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
|
||||
bits = vaddq_u32(bits, bitSet4);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
for( ; i <= n - 4; i += 4 )
|
||||
uint64x2_t bitSet2 = vpaddlq_u32 (bits);
|
||||
result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
|
||||
result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
|
||||
#else
|
||||
for( ; i <= n - 4; i += 4 )
|
||||
result += popCountTable[a[i]] + popCountTable[a[i+1]] +
|
||||
popCountTable[a[i+2]] + popCountTable[a[i+3]];
|
||||
#endif
|
||||
for( ; i < n; i++ )
|
||||
result += popCountTable[a[i]];
|
||||
return result;
|
||||
@@ -1027,27 +1024,24 @@ int normHamming(const uchar* a, const uchar* b, int n)
|
||||
{
|
||||
int i = 0, result = 0;
|
||||
#if CV_NEON
|
||||
if (CPU_HAS_NEON_FEATURE)
|
||||
{
|
||||
uint32x4_t bits = vmovq_n_u32(0);
|
||||
for (; i <= n - 16; i += 16) {
|
||||
uint8x16_t A_vec = vld1q_u8 (a + i);
|
||||
uint8x16_t B_vec = vld1q_u8 (b + i);
|
||||
uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
|
||||
uint8x16_t bitsSet = vcntq_u8 (AxorB);
|
||||
uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
|
||||
uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
|
||||
bits = vaddq_u32(bits, bitSet4);
|
||||
}
|
||||
uint64x2_t bitSet2 = vpaddlq_u32 (bits);
|
||||
result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
|
||||
result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
|
||||
uint32x4_t bits = vmovq_n_u32(0);
|
||||
for (; i <= n - 16; i += 16) {
|
||||
uint8x16_t A_vec = vld1q_u8 (a + i);
|
||||
uint8x16_t B_vec = vld1q_u8 (b + i);
|
||||
uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
|
||||
uint8x16_t bitsSet = vcntq_u8 (AxorB);
|
||||
uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
|
||||
uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
|
||||
bits = vaddq_u32(bits, bitSet4);
|
||||
}
|
||||
else
|
||||
uint64x2_t bitSet2 = vpaddlq_u32 (bits);
|
||||
result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
|
||||
result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
|
||||
#else
|
||||
for( ; i <= n - 4; i += 4 )
|
||||
result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
|
||||
popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
|
||||
#endif
|
||||
for( ; i <= n - 4; i += 4 )
|
||||
result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
|
||||
popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
|
||||
for( ; i < n; i++ )
|
||||
result += popCountTable[a[i] ^ b[i]];
|
||||
return result;
|
||||
|
@@ -1551,3 +1551,16 @@ TEST(Core_Add, AddToColumnWhen4Rows)
|
||||
|
||||
ASSERT_EQ(0, countNonZero(m1 - m2));
|
||||
}
|
||||
|
||||
TEST(Core_round, CvRound)
|
||||
{
|
||||
ASSERT_EQ(2, cvRound(2.0));
|
||||
ASSERT_EQ(2, cvRound(2.1));
|
||||
ASSERT_EQ(-2, cvRound(-2.1));
|
||||
ASSERT_EQ(3, cvRound(2.8));
|
||||
ASSERT_EQ(-3, cvRound(-2.8));
|
||||
ASSERT_EQ(2, cvRound(2.5));
|
||||
ASSERT_EQ(4, cvRound(3.5));
|
||||
ASSERT_EQ(-2, cvRound(-2.5));
|
||||
ASSERT_EQ(-4, cvRound(-3.5));
|
||||
}
|
Reference in New Issue
Block a user