fixes
This commit is contained in:
parent
25e99c453f
commit
f2cd65cf1e
@ -224,7 +224,7 @@ if(MSVC)
|
|||||||
set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
|
set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1600)
|
if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800)
|
||||||
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2")
|
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2")
|
||||||
endif()
|
endif()
|
||||||
if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
|
if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
|
||||||
@ -309,7 +309,7 @@ if(MSVC)
|
|||||||
string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||||
|
|
||||||
if(NOT ENABLE_NOISY_WARNINGS AND MSVC_VERSION EQUAL 1400)
|
if(NOT ENABLE_NOISY_WARNINGS AND MSVC_VERSION EQUAL 1400)
|
||||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4510 /wd4610 /wd4312 /wd4201 /wd4244 /wd4328 /wd4267 /wd4324)
|
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4510 /wd4610 /wd4312 /wd4201 /wd4244 /wd4328 /wd4267)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# allow extern "C" functions throw exceptions
|
# allow extern "C" functions throw exceptions
|
||||||
@ -321,6 +321,7 @@ if(MSVC)
|
|||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
if(NOT ENABLE_NOISY_WARNINGS)
|
if(NOT ENABLE_NOISY_WARNINGS)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251") #class 'std::XXX' needs to have dll-interface to be used by clients of YYY
|
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4251) # class 'std::XXX' needs to have dll-interface to be used by clients of YYY
|
||||||
|
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4324) # 'struct_name' : structure was padded due to __declspec(align())
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
@ -159,12 +159,14 @@
|
|||||||
# define CV_SSE4_2 1
|
# define CV_SSE4_2 1
|
||||||
# endif
|
# endif
|
||||||
# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
|
# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
|
||||||
# ifndef _MSC_VER
|
# ifdef _MSC_VER
|
||||||
|
# include <nmmintrin.h>
|
||||||
|
# else
|
||||||
# include <popcntintrin.h>
|
# include <popcntintrin.h>
|
||||||
# endif
|
# endif
|
||||||
# define CV_POPCNT 1
|
# define CV_POPCNT 1
|
||||||
# endif
|
# endif
|
||||||
# if defined __AVX__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219)
|
# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600)
|
||||||
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
|
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
|
||||||
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
|
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
@ -175,7 +177,7 @@
|
|||||||
# define __xgetbv() 0
|
# define __xgetbv() 0
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
# if defined __AVX2__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219)
|
# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800)
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
# define CV_AVX2 1
|
# define CV_AVX2 1
|
||||||
# if defined __FMA__
|
# if defined __FMA__
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
#define __OPENCV_CORE_SSE_UTILS_HPP__
|
#define __OPENCV_CORE_SSE_UTILS_HPP__
|
||||||
|
|
||||||
#ifndef __cplusplus
|
#ifndef __cplusplus
|
||||||
# error base.hpp header must be compiled as C++
|
# error sse_utils.hpp header must be compiled as C++
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
@ -117,7 +117,7 @@ inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0
|
|||||||
|
|
||||||
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
||||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
||||||
{
|
{
|
||||||
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0);
|
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0);
|
||||||
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0);
|
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0);
|
||||||
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1);
|
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1);
|
||||||
@ -165,9 +165,9 @@ inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
||||||
{
|
{
|
||||||
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
||||||
|
|
||||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||||
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
|
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
|
||||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||||
@ -177,28 +177,28 @@ inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
|||||||
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
|
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
|
||||||
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
|
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
|
||||||
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
|
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
|
||||||
|
|
||||||
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||||
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
|
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
|
||||||
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||||
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
|
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
|
||||||
|
|
||||||
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||||
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
|
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
|
||||||
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||||
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
|
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
|
||||||
|
|
||||||
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||||
v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
|
v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
|
||||||
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||||
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
|
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
||||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
||||||
{
|
{
|
||||||
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
||||||
|
|
||||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||||
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
|
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
|
||||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||||
@ -237,7 +237,7 @@ inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
|||||||
|
|
||||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
||||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
||||||
{
|
{
|
||||||
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
||||||
|
|
||||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||||
@ -286,8 +286,8 @@ inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
|||||||
v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8));
|
v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
||||||
{
|
{
|
||||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0);
|
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0);
|
||||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0);
|
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0);
|
||||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1);
|
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1);
|
||||||
@ -310,8 +310,8 @@ inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
||||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
||||||
{
|
{
|
||||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1);
|
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1);
|
||||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1);
|
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1);
|
||||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0);
|
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0);
|
||||||
@ -342,7 +342,7 @@ inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
||||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
||||||
{
|
{
|
||||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0);
|
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0);
|
||||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0);
|
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0);
|
||||||
@ -352,7 +352,7 @@ inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g
|
|||||||
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0);
|
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0);
|
||||||
__m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1);
|
__m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1);
|
||||||
__m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1);
|
__m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1);
|
||||||
|
|
||||||
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4);
|
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4);
|
||||||
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4);
|
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4);
|
||||||
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5);
|
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5);
|
||||||
@ -393,14 +393,14 @@ inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
|||||||
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
|
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
|
||||||
|
|
||||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
||||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
||||||
|
|
||||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
||||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
||||||
|
|
||||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||||
v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
||||||
@ -421,18 +421,18 @@ inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
|||||||
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
|
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
|
||||||
|
|
||||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
||||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||||
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
||||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
||||||
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
|
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
|
||||||
|
|
||||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
||||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||||
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
||||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
||||||
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
|
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
|
||||||
|
|
||||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||||
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
||||||
@ -457,26 +457,26 @@ inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
|||||||
__m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16));
|
__m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16));
|
||||||
|
|
||||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||||
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
||||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||||
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
||||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
||||||
__m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
|
__m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
|
||||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
|
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
|
||||||
__m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16));
|
__m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16));
|
||||||
|
|
||||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||||
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
||||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||||
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
||||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
||||||
__m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
|
__m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
|
||||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
|
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
|
||||||
__m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16));
|
__m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16));
|
||||||
|
|
||||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||||
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
||||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||||
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
|
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
|
||||||
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
|
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
|
||||||
v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
|
v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
|
||||||
@ -487,12 +487,12 @@ inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
|||||||
#endif // CV_SSE4_1
|
#endif // CV_SSE4_1
|
||||||
|
|
||||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
|
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
|
||||||
{
|
{
|
||||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0);
|
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0);
|
||||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0);
|
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0);
|
||||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1);
|
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1);
|
||||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1);
|
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1);
|
||||||
|
|
||||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2);
|
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2);
|
||||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2);
|
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2);
|
||||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3);
|
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3);
|
||||||
@ -506,14 +506,14 @@ inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m
|
|||||||
|
|
||||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
|
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
|
||||||
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
|
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
|
||||||
{
|
{
|
||||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1);
|
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1);
|
||||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1);
|
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1);
|
||||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0);
|
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0);
|
||||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0);
|
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0);
|
||||||
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1);
|
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1);
|
||||||
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1);
|
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1);
|
||||||
|
|
||||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3);
|
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3);
|
||||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3);
|
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3);
|
||||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4);
|
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4);
|
||||||
@ -531,7 +531,7 @@ inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
|
|||||||
|
|
||||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
|
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
|
||||||
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
|
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
|
||||||
{
|
{
|
||||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0);
|
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0);
|
||||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0);
|
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0);
|
||||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1);
|
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1);
|
||||||
|
@ -3476,7 +3476,7 @@ struct Cmp_SIMD<schar>
|
|||||||
|
|
||||||
haveSSE = checkHardwareSupport(CV_CPU_SSE2);
|
haveSSE = checkHardwareSupport(CV_CPU_SSE2);
|
||||||
|
|
||||||
v_mask = _mm_set1_epi8(0xff);
|
v_mask = _mm_set1_epi8(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int operator () (const schar * src1, const schar * src2, uchar * dst, int width) const
|
int operator () (const schar * src1, const schar * src2, uchar * dst, int width) const
|
||||||
|
@ -616,18 +616,17 @@ struct VMerge4<data_type>
|
|||||||
bool support; \
|
bool support; \
|
||||||
}
|
}
|
||||||
|
|
||||||
MERGE2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
|
MERGE2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
|
||||||
MERGE3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
|
MERGE3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
|
||||||
MERGE4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
|
MERGE4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
|
||||||
|
|
||||||
MERGE2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
|
|
||||||
|
|
||||||
#if CV_SSE4_1
|
#if CV_SSE4_1
|
||||||
|
MERGE2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128);
|
||||||
MERGE3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128);
|
MERGE3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128);
|
||||||
MERGE4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128);
|
MERGE4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
MERGE2_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
|
MERGE2_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
|
||||||
MERGE3_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
|
MERGE3_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
|
||||||
MERGE4_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
|
MERGE4_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
|
||||||
|
|
||||||
|
@ -1460,9 +1460,9 @@ struct RGB2Gray<ushort>
|
|||||||
if( blueIdx == 0 )
|
if( blueIdx == 0 )
|
||||||
std::swap(coeffs[0], coeffs[2]);
|
std::swap(coeffs[0], coeffs[2]);
|
||||||
|
|
||||||
v_cb = _mm_set1_epi16(coeffs[0]);
|
v_cb = _mm_set1_epi16((short)coeffs[0]);
|
||||||
v_cg = _mm_set1_epi16(coeffs[1]);
|
v_cg = _mm_set1_epi16((short)coeffs[1]);
|
||||||
v_cr = _mm_set1_epi16(coeffs[2]);
|
v_cr = _mm_set1_epi16((short)coeffs[2]);
|
||||||
v_delta = _mm_set1_epi32(1 << (yuv_shift - 1));
|
v_delta = _mm_set1_epi32(1 << (yuv_shift - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,6 +64,7 @@ TEST(Photo_SeamlessClone_normal, regression)
|
|||||||
string original_path1 = folder + "source1.png";
|
string original_path1 = folder + "source1.png";
|
||||||
string original_path2 = folder + "destination1.png";
|
string original_path2 = folder + "destination1.png";
|
||||||
string original_path3 = folder + "mask.png";
|
string original_path3 = folder + "mask.png";
|
||||||
|
string reference_path = folder + "reference.png";
|
||||||
|
|
||||||
Mat source = imread(original_path1, IMREAD_COLOR);
|
Mat source = imread(original_path1, IMREAD_COLOR);
|
||||||
Mat destination = imread(original_path2, IMREAD_COLOR);
|
Mat destination = imread(original_path2, IMREAD_COLOR);
|
||||||
@ -79,8 +80,8 @@ TEST(Photo_SeamlessClone_normal, regression)
|
|||||||
p.y = destination.size().height/2;
|
p.y = destination.size().height/2;
|
||||||
seamlessClone(source, destination, mask, p, result, 1);
|
seamlessClone(source, destination, mask, p, result, 1);
|
||||||
|
|
||||||
|
Mat reference = imread(reference_path);
|
||||||
Mat reference = imread(folder + "reference.png");
|
ASSERT_FALSE(reference.empty()) << "Could not load reference image " << reference_path;
|
||||||
|
|
||||||
SAVE(result);
|
SAVE(result);
|
||||||
|
|
||||||
@ -94,6 +95,7 @@ TEST(Photo_SeamlessClone_mixed, regression)
|
|||||||
string original_path1 = folder + "source1.png";
|
string original_path1 = folder + "source1.png";
|
||||||
string original_path2 = folder + "destination1.png";
|
string original_path2 = folder + "destination1.png";
|
||||||
string original_path3 = folder + "mask.png";
|
string original_path3 = folder + "mask.png";
|
||||||
|
string reference_path = folder + "reference.png";
|
||||||
|
|
||||||
Mat source = imread(original_path1, IMREAD_COLOR);
|
Mat source = imread(original_path1, IMREAD_COLOR);
|
||||||
Mat destination = imread(original_path2, IMREAD_COLOR);
|
Mat destination = imread(original_path2, IMREAD_COLOR);
|
||||||
@ -111,7 +113,9 @@ TEST(Photo_SeamlessClone_mixed, regression)
|
|||||||
|
|
||||||
SAVE(result);
|
SAVE(result);
|
||||||
|
|
||||||
Mat reference = imread(folder + "reference.png");
|
Mat reference = imread(reference_path);
|
||||||
|
ASSERT_FALSE(reference.empty()) << "Could not load reference image " << reference_path;
|
||||||
|
|
||||||
double error = cvtest::norm(reference, result, NORM_L1);
|
double error = cvtest::norm(reference, result, NORM_L1);
|
||||||
EXPECT_LE(error, numerical_precision);
|
EXPECT_LE(error, numerical_precision);
|
||||||
|
|
||||||
@ -123,6 +127,7 @@ TEST(Photo_SeamlessClone_featureExchange, regression)
|
|||||||
string original_path1 = folder + "source1.png";
|
string original_path1 = folder + "source1.png";
|
||||||
string original_path2 = folder + "destination1.png";
|
string original_path2 = folder + "destination1.png";
|
||||||
string original_path3 = folder + "mask.png";
|
string original_path3 = folder + "mask.png";
|
||||||
|
string reference_path = folder + "reference.png";
|
||||||
|
|
||||||
Mat source = imread(original_path1, IMREAD_COLOR);
|
Mat source = imread(original_path1, IMREAD_COLOR);
|
||||||
Mat destination = imread(original_path2, IMREAD_COLOR);
|
Mat destination = imread(original_path2, IMREAD_COLOR);
|
||||||
@ -140,7 +145,9 @@ TEST(Photo_SeamlessClone_featureExchange, regression)
|
|||||||
|
|
||||||
SAVE(result);
|
SAVE(result);
|
||||||
|
|
||||||
Mat reference = imread(folder + "reference.png");
|
Mat reference = imread(reference_path);
|
||||||
|
ASSERT_FALSE(reference.empty()) << "Could not load reference image " << reference_path;
|
||||||
|
|
||||||
double error = cvtest::norm(reference, result, NORM_L1);
|
double error = cvtest::norm(reference, result, NORM_L1);
|
||||||
EXPECT_LE(error, numerical_precision);
|
EXPECT_LE(error, numerical_precision);
|
||||||
|
|
||||||
@ -151,6 +158,7 @@ TEST(Photo_SeamlessClone_colorChange, regression)
|
|||||||
string folder = string(cvtest::TS::ptr()->get_data_path()) + "cloning/color_change/";
|
string folder = string(cvtest::TS::ptr()->get_data_path()) + "cloning/color_change/";
|
||||||
string original_path1 = folder + "source1.png";
|
string original_path1 = folder + "source1.png";
|
||||||
string original_path2 = folder + "mask.png";
|
string original_path2 = folder + "mask.png";
|
||||||
|
string reference_path = folder + "reference.png";
|
||||||
|
|
||||||
Mat source = imread(original_path1, IMREAD_COLOR);
|
Mat source = imread(original_path1, IMREAD_COLOR);
|
||||||
Mat mask = imread(original_path2, IMREAD_COLOR);
|
Mat mask = imread(original_path2, IMREAD_COLOR);
|
||||||
@ -163,7 +171,9 @@ TEST(Photo_SeamlessClone_colorChange, regression)
|
|||||||
|
|
||||||
SAVE(result);
|
SAVE(result);
|
||||||
|
|
||||||
Mat reference = imread(folder + "reference.png");
|
Mat reference = imread(reference_path);
|
||||||
|
ASSERT_FALSE(reference.empty()) << "Could not load reference image " << reference_path;
|
||||||
|
|
||||||
double error = cvtest::norm(reference, result, NORM_L1);
|
double error = cvtest::norm(reference, result, NORM_L1);
|
||||||
EXPECT_LE(error, numerical_precision);
|
EXPECT_LE(error, numerical_precision);
|
||||||
|
|
||||||
@ -174,6 +184,7 @@ TEST(Photo_SeamlessClone_illuminationChange, regression)
|
|||||||
string folder = string(cvtest::TS::ptr()->get_data_path()) + "cloning/Illumination_Change/";
|
string folder = string(cvtest::TS::ptr()->get_data_path()) + "cloning/Illumination_Change/";
|
||||||
string original_path1 = folder + "source1.png";
|
string original_path1 = folder + "source1.png";
|
||||||
string original_path2 = folder + "mask.png";
|
string original_path2 = folder + "mask.png";
|
||||||
|
string reference_path = folder + "reference.png";
|
||||||
|
|
||||||
Mat source = imread(original_path1, IMREAD_COLOR);
|
Mat source = imread(original_path1, IMREAD_COLOR);
|
||||||
Mat mask = imread(original_path2, IMREAD_COLOR);
|
Mat mask = imread(original_path2, IMREAD_COLOR);
|
||||||
@ -186,7 +197,7 @@ TEST(Photo_SeamlessClone_illuminationChange, regression)
|
|||||||
|
|
||||||
SAVE(result);
|
SAVE(result);
|
||||||
|
|
||||||
Mat reference = imread(folder + "reference.png");
|
Mat reference = imread(reference_path);
|
||||||
double error = cvtest::norm(reference, result, NORM_L1);
|
double error = cvtest::norm(reference, result, NORM_L1);
|
||||||
EXPECT_LE(error, numerical_precision);
|
EXPECT_LE(error, numerical_precision);
|
||||||
|
|
||||||
@ -197,6 +208,7 @@ TEST(Photo_SeamlessClone_textureFlattening, regression)
|
|||||||
string folder = string(cvtest::TS::ptr()->get_data_path()) + "cloning/Texture_Flattening/";
|
string folder = string(cvtest::TS::ptr()->get_data_path()) + "cloning/Texture_Flattening/";
|
||||||
string original_path1 = folder + "source1.png";
|
string original_path1 = folder + "source1.png";
|
||||||
string original_path2 = folder + "mask.png";
|
string original_path2 = folder + "mask.png";
|
||||||
|
string reference_path = folder + "reference.png";
|
||||||
|
|
||||||
Mat source = imread(original_path1, IMREAD_COLOR);
|
Mat source = imread(original_path1, IMREAD_COLOR);
|
||||||
Mat mask = imread(original_path2, IMREAD_COLOR);
|
Mat mask = imread(original_path2, IMREAD_COLOR);
|
||||||
@ -209,7 +221,9 @@ TEST(Photo_SeamlessClone_textureFlattening, regression)
|
|||||||
|
|
||||||
SAVE(result);
|
SAVE(result);
|
||||||
|
|
||||||
Mat reference = imread(folder + "reference.png");
|
Mat reference = imread(reference_path);
|
||||||
|
ASSERT_FALSE(reference.empty()) << "Could not load reference image " << reference_path;
|
||||||
|
|
||||||
double error = cvtest::norm(reference, result, NORM_L1);
|
double error = cvtest::norm(reference, result, NORM_L1);
|
||||||
EXPECT_LE(error, numerical_precision);
|
EXPECT_LE(error, numerical_precision);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user