diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake index 780ee51b8..559f70a96 100644 --- a/cmake/OpenCVFindIPP.cmake +++ b/cmake/OpenCVFindIPP.cmake @@ -156,6 +156,7 @@ macro(ipp_set_variables _LATEST_VERSION) set(IPPCC "cc") # color conversion set(IPPCV "cv") # computer vision set(IPPVM "vm") # vector math + set(IPPM "m") # matrix math list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) @@ -163,6 +164,9 @@ macro(ipp_set_variables _LATEST_VERSION) list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPI}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPS}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCORE}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) + if(NOT HAVE_IPP_ICV_ONLY) + list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPM}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) + endif() # FIXIT # if(UNIX AND NOT HAVE_IPP_ICV_ONLY) @@ -177,12 +181,16 @@ macro(ipp_set_variables _LATEST_VERSION) if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64) message(SEND_ERROR "Intel compiler EM64T libraries not found") endif() - set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64) + if(NOT APPLE) + set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64) + endif() else() if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32) message(SEND_ERROR "Intel compiler IA32 libraries not found") endif() - set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32) + if (NOT APPLE) + set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32) + endif() endif() list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}irc${CMAKE_SHARED_LIBRARY_SUFFIX}) list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}imf${CMAKE_SHARED_LIBRARY_SUFFIX}) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 45351d1a8..f516e6478 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -223,6 +223,13 @@ static inline IppiSize ippiSize(int width, int height) IppiSize size = { width, height }; return size; } + +static inline IppiSize ippiSize(const cv::Size & _size) +{ + IppiSize size = { _size.width, _size.height }; + return size; +} + #else # define IPP_VERSION_X100 0 #endif diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index ecc2ca064..aa9469c04 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -460,7 +460,7 @@ static void add8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -479,7 +479,7 @@ static void add16u( const ushort* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -491,7 +491,7 @@ static void add16s( const short* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -510,7 +510,7 @@ static void add32f( const float* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, sz)); @@ -529,7 +529,7 @@ static void sub8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -548,7 +548,7 @@ static void sub16u( const ushort* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -560,7 +560,7 @@ static void sub16s( const short* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0)) + if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0)) return; #endif (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -579,7 +579,7 @@ static void sub32f( const float* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, sz)); @@ -801,7 +801,7 @@ static void absdiff8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); @@ -820,7 +820,7 @@ static void absdiff16u( const ushort* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); @@ -846,7 +846,7 @@ static void absdiff32f( const float* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, sz)); @@ -866,7 +866,7 @@ static void and8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VAnd)>(src1, step1, src2, step2, dst, step, sz)); @@ -878,7 +878,7 @@ static void or8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VOr)>(src1, step1, src2, step2, dst, step, sz)); @@ -890,7 +890,7 @@ static void xor8u( const uchar* src1, size_t step1, { #if (ARITHM_USE_IPP == 1) fixSteps(sz, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz)) + if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VXor)>(src1, step1, src2, step2, dst, step, sz)); @@ -901,8 +901,8 @@ static void not8u( const uchar* src1, size_t step1, uchar* dst, size_t step, Size sz, void* ) { #if (ARITHM_USE_IPP == 1) - fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2; - if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz)) + fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void)src2; + if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, ippiSize(sz))) return; #endif (vBinOp, IF_SIMD(VNot)>(src1, step1, src2, step2, dst, step, sz)); @@ -2386,7 +2386,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif @@ -2469,7 +2469,7 @@ static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif @@ -2484,7 +2484,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st if( op > 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif @@ -2590,7 +2590,7 @@ static void cmp32f(const float* src1, size_t step1, const float* src2, size_t st if( op >= 0 ) { fixSteps(size, sizeof(dst[0]), step1, step2, step); - if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op)) + if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op)) return; } #endif diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index cd5cf9b73..66ae7f90b 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -1079,6 +1079,33 @@ dtype* dst, size_t dstep, Size size, double* scale) \ cvtScale_(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \ } +#ifdef HAVE_IPP +#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ +static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ + dtype* dst, size_t dstep, Size size, double*) \ +{ \ + if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \ + return; \ + cvt_(src, sstep, dst, dstep, size); \ +} + +#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \ +static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ + dtype* dst, size_t dstep, Size size, double*) \ +{ \ + if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \ + return; \ + cvt_(src, sstep, dst, dstep, size); \ +} +#else +#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ +static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ + dtype* dst, size_t dstep, Size size, double*) \ +{ \ + cvt_(src, sstep, dst, dstep, size); \ +} +#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F +#endif #define DEF_CVT_FUNC(suffix, stype, dtype) \ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ @@ -1089,7 +1116,7 @@ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ #define DEF_CPY_FUNC(suffix, stype) \ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ -stype* dst, size_t dstep, Size size, double*) \ + stype* dst, size_t dstep, Size size, double*) \ { \ cpy_(src, sstep, dst, dstep, size); \ } @@ -1160,48 +1187,48 @@ DEF_CVT_SCALE_FUNC(32f64f, float, double, double) DEF_CVT_SCALE_FUNC(64f, double, double, double) DEF_CPY_FUNC(8u, uchar) -DEF_CVT_FUNC(8s8u, schar, uchar) -DEF_CVT_FUNC(16u8u, ushort, uchar) -DEF_CVT_FUNC(16s8u, short, uchar) -DEF_CVT_FUNC(32s8u, int, uchar) -DEF_CVT_FUNC(32f8u, float, uchar) +DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs) +DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R) +DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R) +DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R) +DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs) DEF_CVT_FUNC(64f8u, double, uchar) -DEF_CVT_FUNC(8u8s, uchar, schar) -DEF_CVT_FUNC(16u8s, ushort, schar) -DEF_CVT_FUNC(16s8s, short, schar) -DEF_CVT_FUNC(32s8s, int, schar) -DEF_CVT_FUNC(32f8s, float, schar) +DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs) +DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs) +DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs) +DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R) +DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs) DEF_CVT_FUNC(64f8s, double, schar) -DEF_CVT_FUNC(8u16u, uchar, ushort) -DEF_CVT_FUNC(8s16u, schar, ushort) +DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R) +DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs) DEF_CPY_FUNC(16u, ushort) -DEF_CVT_FUNC(16s16u, short, ushort) -DEF_CVT_FUNC(32s16u, int, ushort) -DEF_CVT_FUNC(32f16u, float, ushort) +DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs) +DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs) +DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs) DEF_CVT_FUNC(64f16u, double, ushort) -DEF_CVT_FUNC(8u16s, uchar, short) -DEF_CVT_FUNC(8s16s, schar, short) -DEF_CVT_FUNC(16u16s, ushort, short) -DEF_CVT_FUNC(32s16s, int, short) -DEF_CVT_FUNC(32f16s, float, short) +DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R) +DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R) +DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs) +DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs) +DEF_CVT_FUNC_F2(32f16s, float, short, 32f16s_C1RSfs) DEF_CVT_FUNC(64f16s, double, short) -DEF_CVT_FUNC(8u32s, uchar, int) -DEF_CVT_FUNC(8s32s, schar, int) -DEF_CVT_FUNC(16u32s, ushort, int) -DEF_CVT_FUNC(16s32s, short, int) +DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R) +DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R) +DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R) +DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R) DEF_CPY_FUNC(32s, int) -DEF_CVT_FUNC(32f32s, float, int) +DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs) DEF_CVT_FUNC(64f32s, double, int) -DEF_CVT_FUNC(8u32f, uchar, float) -DEF_CVT_FUNC(8s32f, schar, float) -DEF_CVT_FUNC(16u32f, ushort, float) -DEF_CVT_FUNC(16s32f, short, float) -DEF_CVT_FUNC(32s32f, int, float) +DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R) +DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R) +DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R) +DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R) +DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R) DEF_CVT_FUNC(64f32f, double, float) DEF_CVT_FUNC(8u64f, uchar, double) @@ -1434,7 +1461,7 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta) Size sz((int)(it.size*cn), 1); for( size_t i = 0; i < it.nplanes; i++, ++it ) - func(ptrs[0], 0, 0, 0, ptrs[1], 0, sz, scale); + func(ptrs[0], 1, 0, 0, ptrs[1], 1, sz, scale); } } diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 5ac5f22c5..824508fc9 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -495,25 +495,17 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) else kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; - Size size = _src.size(); - int cols = size.width, rows = size.height; - if ((cols == 1 && flipType == FLIP_COLS) || - (rows == 1 && flipType == FLIP_ROWS) || - (rows == 1 && cols == 1 && flipType == FLIP_BOTH)) - { - _src.copyTo(_dst); - return true; - } - ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, format( "-D T=%s -D T1=%s -D cn=%d", ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), cn)); if (k.empty()) return false; + Size size = _src.size(); _dst.create(size, type); UMat src = _src.getUMat(), dst = _dst.getUMat(); + int cols = size.width, rows = size.height; cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; @@ -531,13 +523,59 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) void flip( InputArray _src, OutputArray _dst, int flip_mode ) { CV_Assert( _src.dims() <= 2 ); + Size size = _src.size(); - CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src,_dst, flip_mode)) + if (flip_mode < 0) + { + if (size.width == 1) + flip_mode = 0; + if (size.height == 1) + flip_mode = 1; + } + + if ((size.width == 1 && flip_mode > 0) || + (size.height == 1 && flip_mode == 0) || + (size.height == 1 && size.width == 1 && flip_mode < 0)) + { + return _src.copyTo(_dst); + } + + CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) Mat src = _src.getMat(); - _dst.create( src.size(), src.type() ); + int type = src.type(); + _dst.create( size, type ); Mat dst = _dst.getMat(); - size_t esz = src.elemSize(); + size_t esz = CV_ELEM_SIZE(type); + +#ifdef HAVE_IPP + typedef IppStatus (CV_STDCALL * ippiMirror)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize, IppiAxis flip); + ippiMirror ippFunc = + type == CV_8UC1 ? (ippiMirror)ippiMirror_8u_C1R : + type == CV_8UC3 ? (ippiMirror)ippiMirror_8u_C3R : + type == CV_8UC4 ? (ippiMirror)ippiMirror_8u_C4R : + type == CV_16UC1 ? (ippiMirror)ippiMirror_16u_C1R : + type == CV_16UC3 ? (ippiMirror)ippiMirror_16u_C3R : + type == CV_16UC4 ? (ippiMirror)ippiMirror_16u_C4R : + type == CV_16SC1 ? (ippiMirror)ippiMirror_16s_C1R : + type == CV_16SC3 ? (ippiMirror)ippiMirror_16s_C3R : + type == CV_16SC4 ? (ippiMirror)ippiMirror_16s_C4R : + type == CV_32SC1 ? (ippiMirror)ippiMirror_32s_C1R : + type == CV_32SC3 ? (ippiMirror)ippiMirror_32s_C3R : + type == CV_32SC4 ? (ippiMirror)ippiMirror_32s_C4R : + type == CV_32FC1 ? (ippiMirror)ippiMirror_32f_C1R : + type == CV_32FC3 ? (ippiMirror)ippiMirror_32f_C3R : + type == CV_32FC4 ? (ippiMirror)ippiMirror_32f_C4R : 0; + IppiAxis axis = flip_mode == 0 ? ippAxsHorizontal : + flip_mode > 0 ? ippAxsVertical : ippAxsBoth; + + if (ippFunc != 0) + { + IppStatus status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, ippiSize(src.cols, src.rows), axis); + if (status >= 0) + return; + } +#endif if( flip_mode <= 0 ) flipVert( src.data, src.step, dst.data, dst.step, src.size(), esz ); diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 12ba4fa5b..376cbadae 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -238,6 +238,12 @@ float cubeRoot( float value ) static void Magnitude_32f(const float* x, const float* y, float* mag, int len) { +#ifdef HAVE_IPP + IppStatus status = ippsMagnitude_32f(x, y, mag, len); + if (status >= 0) + return; +#endif + int i = 0; #if CV_SSE @@ -264,6 +270,12 @@ static void Magnitude_32f(const float* x, const float* y, float* mag, int len) static void Magnitude_64f(const double* x, const double* y, double* mag, int len) { +#ifdef HAVE_IPP + IppStatus status = ippsMagnitude_64f(x, y, mag, len); + if (status >= 0) + return; +#endif + int i = 0; #if CV_SSE2 @@ -291,6 +303,11 @@ static void Magnitude_64f(const double* x, const double* y, double* mag, int len static void InvSqrt_32f(const float* src, float* dst, int len) { +#ifdef HAVE_IPP + if (ippsInvSqrt_32f_A21(src, dst, len) >= 0) + return; +#endif + int i = 0; #if CV_SSE @@ -334,6 +351,10 @@ static void InvSqrt_64f(const double* src, double* dst, int len) static void Sqrt_32f(const float* src, float* dst, int len) { +#ifdef HAVE_IPP + if (ippsSqrt_32f_A21(src, dst, len) >= 0) + return; +#endif int i = 0; #if CV_SSE @@ -363,6 +384,11 @@ static void Sqrt_32f(const float* src, float* dst, int len) static void Sqrt_64f(const double* src, double* dst, int len) { +#ifdef HAVE_IPP + if (ippsSqrt_64f_A50(src, dst, len) >= 0) + return; +#endif + int i = 0; #if CV_SSE2 @@ -729,6 +755,22 @@ void polarToCart( InputArray src1, InputArray src2, dst2.create( Angle.dims, Angle.size, type ); Mat X = dst1.getMat(), Y = dst2.getMat(); +#ifdef HAVE_IPP + if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees) + { + typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase, + void * pDstRe, void * pDstIm, int len); + ippsPolarToCart ippFunc = + depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f : + depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0; + CV_Assert(ippFunc != 0); + + IppStatus status = ippFunc(Mag.data, Angle.data, X.data, Y.data, static_cast(cn * X.total())); + if (status >= 0) + return; + } +#endif + const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0}; uchar* ptrs[4]; NAryMatIterator it(arrays, ptrs); @@ -2119,6 +2161,29 @@ void pow( InputArray _src, double power, OutputArray _dst ) _src.copyTo(_dst); return; case 2: +#ifdef HAVE_IPP + if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) || (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) )) + { + Mat src = _src.getMat(); + _dst.create( src.dims, src.size, type ); + Mat dst = _dst.getMat(); + + Size size = src.size(); + int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type); + if (src.isContinuous() && dst.isContinuous()) + { + size.width = (int)src.total(); + size.height = 1; + srcstep = dststep = (int)src.total() * esz; + } + size.width *= cn; + + IppStatus status = ippiSqr_32f_C1R((const Ipp32f *)src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + + if (status >= 0) + return; + } +#endif if (same) multiply(_dst, _dst, _dst); else @@ -2168,6 +2233,18 @@ void pow( InputArray _src, double power, OutputArray _dst ) } else { +#ifdef HAVE_IPP + if (src.isContinuous() && dst.isContinuous()) + { + IppStatus status = depth == CV_32F ? + ippsPowx_32f_A21((const Ipp32f *)src.data, (Ipp32f)power, (Ipp32f*)dst.data, (Ipp32s)(src.total() * cn)) : + ippsPowx_64f_A50((const Ipp64f *)src.data, power, (Ipp64f*)dst.data, (Ipp32s)(src.total() * cn)); + + if (status >= 0) + return; + } +#endif + int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn); size_t esz1 = src.elemSize1(); diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 8891bb05f..23735194d 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2212,7 +2212,7 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray Mat src1 = _src1.getMat(), src2 = _src2.getMat(); CV_Assert(src1.size == src2.size); - _dst.create(src1.dims, src1.size, src1.type()); + _dst.create(src1.dims, src1.size, type); Mat dst = _dst.getMat(); float falpha = (float)alpha; @@ -2220,9 +2220,16 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray ScaleAddFunc func = depth == CV_32F ? (ScaleAddFunc)scaleAdd_32f : (ScaleAddFunc)scaleAdd_64f; - if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() ) + if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous()) { size_t len = src1.total()*cn; +#if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY + if (depth == CV_32F && + ippmSaxpy_vava_32f((const Ipp32f *)src1.data, (int)src1.step, sizeof(Ipp32f), falpha, + (const Ipp32f *)src2.data, (int)src2.step, sizeof(Ipp32f), + (Ipp32f *)dst.data, (int)dst.step, sizeof(Ipp32f), (int)len, 1) >= 0) + return; +#endif func(src1.data, src2.data, dst.data, (int)len, palpha); return; } diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 45ae3d512..7e2976e43 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -2967,6 +2967,30 @@ void cv::transpose( InputArray _src, OutputArray _dst ) return; } +#ifdef HAVE_IPP + typedef IppStatus (CV_STDCALL * ippiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); + ippiTranspose ippFunc = + type == CV_8UC1 ? (ippiTranspose)ippiTranspose_8u_C1R : + type == CV_8UC3 ? (ippiTranspose)ippiTranspose_8u_C3R : + type == CV_8UC4 ? (ippiTranspose)ippiTranspose_8u_C4R : + type == CV_16UC1 ? (ippiTranspose)ippiTranspose_16u_C1R : + type == CV_16UC3 ? (ippiTranspose)ippiTranspose_16u_C3R : + type == CV_16UC4 ? (ippiTranspose)ippiTranspose_16u_C4R : + type == CV_16SC1 ? (ippiTranspose)ippiTranspose_16s_C1R : + type == CV_16SC3 ? (ippiTranspose)ippiTranspose_16s_C3R : + type == CV_16SC4 ? (ippiTranspose)ippiTranspose_16s_C4R : + type == CV_32SC1 ? (ippiTranspose)ippiTranspose_32s_C1R : + type == CV_32SC3 ? (ippiTranspose)ippiTranspose_32s_C3R : + type == CV_32SC4 ? (ippiTranspose)ippiTranspose_32s_C4R : + type == CV_32FC1 ? (ippiTranspose)ippiTranspose_32f_C1R : + type == CV_32FC3 ? (ippiTranspose)ippiTranspose_32f_C3R : + type == CV_32FC4 ? (ippiTranspose)ippiTranspose_32f_C4R : 0; + + IppiSize roiSize = { src.cols, src.rows }; + if (ippFunc != 0 && ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, roiSize) >= 0) + return; +#endif + if( dst.data == src.data ) { TransposeInplaceFunc func = transposeInplaceTab[esz]; diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 0e3d44ed6..c71745957 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -933,10 +933,10 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input dcn_stddev = (int)stddev.total(); pstddev = (Ipp64f *)stddev.data; } - for( int k = cn; k < dcn_mean; k++ ) - pmean[k] = 0; - for( int k = cn; k < dcn_stddev; k++ ) - pstddev[k] = 0; + for( int c = cn; c < dcn_mean; c++ ) + pmean[c] = 0; + for( int c = cn; c < dcn_stddev; c++ ) + pstddev[c] = 0; IppiSize sz = { cols, rows }; int type = src.type(); if( !mask.empty() ) @@ -2016,6 +2016,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) size_t total_size = src.total(); int rows = src.size[0], cols = (int)(total_size/rows); + if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous())) && cols > 0 && (size_t)rows*cols == total_size && (normType == NORM_INF || normType == NORM_L1 || diff --git a/modules/imgproc/src/accum.cpp b/modules/imgproc/src/accum.cpp index f130f34da..216ddcb39 100644 --- a/modules/imgproc/src/accum.cpp +++ b/modules/imgproc/src/accum.cpp @@ -457,6 +457,56 @@ void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _m Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); +#ifdef HAVE_IPP + if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous()))) + { + typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize); + typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, + int srcDstStep, IppiSize roiSize); + ippiAddSquare ippFunc = 0; + ippiAddSquareMask ippFuncMask = 0; + + if (mask.empty()) + { + ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0; + } + else if (scn == 1) + { + ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0; + } + + if (ippFunc || ippFuncMask) + { + IppStatus status = ippStsNoErr; + + Size size = src.size(); + int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step; + if (src.isContinuous() && dst.isContinuous() && mask.isContinuous()) + { + srcstep = static_cast(src.total() * src.elemSize()); + dststep = static_cast(dst.total() * dst.elemSize()); + maskstep = static_cast(mask.total() * mask.elemSize()); + size.width = static_cast(src.total()); + size.height = 1; + } + size.width *= scn; + + if (mask.empty()) + status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + else + status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep, + (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + + if (status >= 0) + return; + } + } +#endif + int fidx = getAccTabIdx(sdepth, ddepth); AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0; CV_Assert( func != 0 ); @@ -485,6 +535,59 @@ void cv::accumulateProduct( InputArray _src1, InputArray _src2, Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); +#ifdef HAVE_IPP + if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous())) + { + typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2, + int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize); + typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step, + const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize); + ippiAddProduct ippFunc = 0; + ippiAddProductMask ippFuncMask = 0; + + if (mask.empty()) + { + ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0; + } + else if (scn == 1) + { + ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0; + } + + if (ippFunc || ippFuncMask) + { + IppStatus status = ippStsNoErr; + + Size size = src1.size(); + int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step; + if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous()) + { + src1step = static_cast(src1.total() * src1.elemSize()); + src2step = static_cast(src2.total() * src2.elemSize()); + dststep = static_cast(dst.total() * dst.elemSize()); + maskstep = static_cast(mask.total() * mask.elemSize()); + size.width = static_cast(src1.total()); + size.height = 1; + } + size.width *= scn; + + if (mask.empty()) + status = ippFunc(src1.data, src1step, src2.data, src2step, (Ipp32f *)dst.data, + dststep, ippiSize(size.width, size.height)); + else + status = ippFuncMask(src1.data, src1step, src2.data, src2step, (Ipp8u *)mask.data, maskstep, + (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height)); + + if (status >= 0) + return; + } + } +#endif + int fidx = getAccTabIdx(sdepth, ddepth); AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0; CV_Assert( func != 0 ); @@ -512,6 +615,58 @@ void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst, Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); +#ifdef HAVE_IPP + if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous())) + { + typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, + IppiSize roiSize, Ipp32f alpha); + typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, + int maskStep, Ipp32f * pSrcDst, + int srcDstStep, IppiSize roiSize, Ipp32f alpha); + ippiAddWeighted ippFunc = 0; + ippiAddWeightedMask ippFuncMask = 0; + + if (mask.empty()) + { + ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0; + } + else if (scn == 1) + { + ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR : + sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR : + sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0; + } + + if (ippFunc || ippFuncMask) + { + IppStatus status = ippStsNoErr; + + Size size = src.size(); + int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step; + if (src.isContinuous() && dst.isContinuous() && mask.isContinuous()) + { + srcstep = static_cast(src.total() * src.elemSize()); + dststep = static_cast(dst.total() * dst.elemSize()); + maskstep = static_cast(mask.total() * mask.elemSize()); + size.width = static_cast((int)src.total()); + size.height = 1; + } + size.width *= scn; + + if (mask.empty()) + status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha); + else + status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep, + (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha); + + if (status >= 0) + return; + } + } +#endif + int fidx = getAccTabIdx(sdepth, ddepth); AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0; CV_Assert( func != 0 ); diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 8ab7e4929..bded52839 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -298,7 +298,7 @@ static ippiReorderFunc ippiSwapChannelsC3RTab[] = 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 }; -#if (IPP_VERSION_X100 >= 801) +#if IPP_VERSION_X100 >= 801 static ippiReorderFunc ippiSwapChannelsC4RTab[] = { (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0, @@ -3315,7 +3315,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( scn == 3 || scn == 4 ); _dst.create(sz, CV_MAKETYPE(depth, 1)); dst = _dst.getMat(); -/**/ + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) /* if( code == CV_BGR2GRAY ) @@ -3341,7 +3341,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) return; } #endif -/**/ + bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; if( depth == CV_8U ) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 0c7aafc7b..344601bf2 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -61,9 +61,9 @@ namespace cv typedef IppStatus (CV_STDCALL* ippiResizeGetSrcOffset)(void*, IppiPoint, IppiPoint*); #endif -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) && 0 typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize); - typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int); + typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int); typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int); template @@ -75,7 +75,7 @@ namespace cv return func(values, dataPointer, step, size) >= 0; } - bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth) + static bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth) { if( channels == 1 ) { @@ -3892,11 +3892,11 @@ void cv::convertMaps( InputArray _map1, InputArray _map2, namespace cv { -class warpAffineInvoker : +class WarpAffineInvoker : public ParallelLoopBody { public: - warpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType, + WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType, const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) : ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta), @@ -4013,16 +4013,20 @@ private: double *M; }; -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) -class IPPwarpAffineInvoker : + + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 +class IPPWarpAffineInvoker : public ParallelLoopBody { public: - IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) - { - *ok = true; - } + IPPWarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int _borderType, + const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), + borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) + { + *ok = true; + } virtual void operator() (const Range& range) const { @@ -4040,21 +4044,26 @@ public: return; } } - if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr + + // Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr + IppStatus status = func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, + (int)dst.step[0], dstroi, coeffs, mode ); + if( status < 0) *ok = false; } private: Mat &src; Mat &dst; - double (&coeffs)[2][3]; int mode; + double (&coeffs)[2][3]; int borderType; Scalar borderValue; ippiWarpAffineBackFunc func; bool *ok; - const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&); + const IPPWarpAffineInvoker& operator= (const IPPWarpAffineInvoker&); }; #endif + */ #ifdef HAVE_OPENCL @@ -4204,16 +4213,19 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols; const int AB_BITS = MAX(10, (int)INTER_BITS); const int AB_SCALE = 1 << AB_BITS; -/* -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) - int depth = src.depth(); - int channels = src.channels(); + + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 + int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && - ( channels == 1 || channels == 3 || channels == 4 ) && - ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) ) + ( cn == 1 || cn == 3 || cn == 4 ) && + ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) && + ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT) ) { - int type = src.type(); - ippiWarpAffineBackFunc ippFunc = + ippiWarpAffineBackFunc ippFunc = 0; + if ((flags & WARP_INVERSE_MAP) != 0) + { + ippFunc = type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R : type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R : type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R : @@ -4224,31 +4236,43 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R : type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R : 0; - int mode = - flags == INTER_LINEAR ? IPPI_INTER_LINEAR : - flags == INTER_NEAREST ? IPPI_INTER_NN : - flags == INTER_CUBIC ? IPPI_INTER_CUBIC : - 0; - if( mode && ippFunc ) - { - double coeffs[2][3]; - for( int i = 0; i < 2; i++ ) - { - for( int j = 0; j < 3; j++ ) - { - coeffs[i][j] = matM.at(i, j); - } - } - bool ok; - Range range(0, dst.rows); - IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); - parallel_for_(range, invoker, dst.total()/(double)(1<<16)); - if( ok ) - return; } + else + { + ippFunc = + type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C1R : + type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C3R : + type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C4R : + type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C1R : + type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C3R : + type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C4R : + type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C1R : + type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C3R : + type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C4R : + 0; + } + int mode = + interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : + interpolation == INTER_NEAREST ? IPPI_INTER_NN : + interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : + 0; + CV_Assert(mode && ippFunc); + + double coeffs[2][3]; + for( int i = 0; i < 2; i++ ) + for( int j = 0; j < 3; j++ ) + coeffs[i][j] = matM.at(i, j); + + bool ok; + Range range(0, dst.rows); + IPPWarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + if( ok ) + return; } #endif -*/ + */ + for( x = 0; x < dst.cols; x++ ) { adelta[x] = saturate_cast(M[0]*x*AB_SCALE); @@ -4256,7 +4280,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, } Range range(0, dst.rows); - warpAffineInvoker invoker(src, dst, interpolation, borderType, + WarpAffineInvoker invoker(src, dst, interpolation, borderType, borderValue, adelta, bdelta, M); parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } @@ -4265,12 +4289,12 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, namespace cv { -class warpPerspectiveInvoker : +class WarpPerspectiveInvoker : public ParallelLoopBody { public: - warpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation, + WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation, int _borderType, const Scalar &_borderValue) : ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue) @@ -4356,16 +4380,19 @@ private: Scalar borderValue; }; -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) -class IPPwarpPerspectiveInvoker : + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 +class IPPWarpPerspectiveInvoker : public ParallelLoopBody { public: - IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) - { - *ok = true; - } + IPPWarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, + int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveFunc _func, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), + borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok) + { + *ok = true; + } virtual void operator() (const Range& range) const { @@ -4384,22 +4411,25 @@ public: return; } } - if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0) + + IppStatus status = func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode); + if (status != ippStsNoErr) *ok = false; } private: Mat &src; Mat &dst; - double (&coeffs)[3][3]; int mode; + double (&coeffs)[3][3]; int borderType; const Scalar borderValue; - ippiWarpPerspectiveBackFunc func; + ippiWarpPerspectiveFunc func; bool *ok; - const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&); + + const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&); }; #endif - + */ } void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, @@ -4432,55 +4462,65 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, return; #endif - if( !(flags & WARP_INVERSE_MAP) ) - invert(matM, matM); -/* -#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) - int depth = src.depth(); - int channels = src.channels(); - if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) && - ( channels == 1 || channels == 3 || channels == 4 ) && - ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) ) + /* +#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801 + int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + if( (depth == CV_8U || depth == CV_16U || depth == CV_32F) && + (cn == 1 || cn == 3 || cn == 4) && + ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) && + (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC)) { - int type = src.type(); - ippiWarpPerspectiveBackFunc ippFunc = - type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R : - type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R : - type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R : - type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R : - type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R : - type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R : - type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R : - type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R : - type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R : - 0; - int mode = - flags == INTER_LINEAR ? IPPI_INTER_LINEAR : - flags == INTER_NEAREST ? IPPI_INTER_NN : - flags == INTER_CUBIC ? IPPI_INTER_CUBIC : - 0; - if( mode && ippFunc ) + ippiWarpPerspectiveFunc ippFunc = 0; + if ((flags & WARP_INVERSE_MAP) != 0) { - double coeffs[3][3]; - for( int i = 0; i < 3; i++ ) - { - for( int j = 0; j < 3; j++ ) - { - coeffs[i][j] = matM.at(i, j); - } - } - bool ok; - Range range(0, dst.rows); - IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); - parallel_for_(range, invoker, dst.total()/(double)(1<<16)); - if( ok ) - return; + ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C1R : + type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C3R : + type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C4R : + type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C1R : + type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C3R : + type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C4R : + type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C1R : + type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C3R : + type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C4R : 0; } + else + { + ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C1R : + type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C3R : + type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C4R : + type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C1R : + type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C3R : + type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C4R : + type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C1R : + type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C3R : + type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C4R : 0; + } + int mode = + interpolation == INTER_NEAREST ? IPPI_INTER_NN : + interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : + interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : 0; + CV_Assert(mode && ippFunc); + + double coeffs[3][3]; + for( int i = 0; i < 3; i++ ) + for( int j = 0; j < 3; j++ ) + coeffs[i][j] = matM.at(i, j); + + bool ok; + Range range(0, dst.rows); + IPPWarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + if( ok ) + return; } #endif -*/ + */ + + if( !(flags & WARP_INVERSE_MAP) ) + invert(matM, matM); + Range range(0, dst.rows); - warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue); + WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue); parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 84570bd2c..f0f5411e8 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -841,7 +841,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth, CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize)) Mat src = _src.getMat(); - int sdepth = src.depth(), cn = src.channels(); + int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); if( ddepth < 0 ) ddepth = sdepth; _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) ); @@ -858,6 +858,69 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth, return; #endif +#ifdef HAVE_IPP + int ippBorderType = borderType & ~BORDER_ISOLATED; + Point ocvAnchor, ippAnchor; + ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x; + ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y; + ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0); + ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0); + + if (normalize && !src.isSubmatrix() && ddepth == sdepth && + (ippBorderType == BORDER_REPLICATE || ippBorderType == BORDER_CONSTANT) && + ocvAnchor == ippAnchor ) + { + Ipp32s bufSize; + IppiSize roiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize.width, ksize.height); + +#define IPP_FILTER_BOX_BORDER(ippType, ippDataType, flavor) \ + do \ + { \ + if (ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippDataType, cn, &bufSize) >= 0) \ + { \ + Ipp8u * buffer = ippsMalloc_8u(bufSize); \ + ippType borderValue[4] = { 0, 0, 0, 0 }; \ + ippBorderType = ippBorderType == BORDER_CONSTANT ? ippBorderConst : ippBorderType == BORDER_REPLICATE ? ippBorderRepl : -1; \ + CV_Assert(ippBorderType >= 0); \ + IppStatus status = ippiFilterBoxBorder_##flavor((ippType *)src.data, (int)src.step, (ippType *)dst.data, (int)dst.step, roiSize, maskSize, \ + (IppiBorderType)ippBorderType, borderValue, buffer); \ + ippsFree(buffer); \ + if (status >= 0) \ + return; \ + } \ + } while ((void)0, 0) + + if (stype == CV_8UC1) + IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C1R); + else if (stype == CV_8UC3) + IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C3R); + else if (stype == CV_8UC4) + IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C4R); + + else if (stype == CV_16UC1) + IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C1R); + else if (stype == CV_16UC3) + IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C3R); + else if (stype == CV_16UC4) + IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C4R); + + else if (stype == CV_16SC1) + IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C1R); + else if (stype == CV_16SC3) + IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C3R); + else if (stype == CV_16SC4) + IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C4R); + + else if (stype == CV_32FC1) + IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C1R); + else if (stype == CV_32FC3) + IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C3R); + else if (stype == CV_32FC4) + IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C4R); + } +#undef IPP_FILTER_BOX_BORDER +#endif + Ptr f = createBoxFilter( src.type(), dst.type(), ksize, anchor, normalize, borderType ); f->apply( src, dst ); @@ -1948,13 +2011,46 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) return; } - CV_OCL_RUN(_src0.dims() <= 2 && _dst.isUMat(), + CV_OCL_RUN(_dst.isUMat(), ocl_medianFilter(_src0,_dst, ksize)) Mat src0 = _src0.getMat(); _dst.create( src0.size(), src0.type() ); Mat dst = _dst.getMat(); +#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 8 && IPP_VERSION_MINOR >= 1 +#define IPP_FILTER_MEDIAN_BORDER(ippType, ippDataType, flavor) \ + do \ + { \ + if (ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, \ + ippDataType, CV_MAT_CN(type), &bufSize) >= 0) \ + { \ + Ipp8u * buffer = ippsMalloc_8u(bufSize); \ + IppStatus status = ippiFilterMedianBorder_##flavor((const ippType *)src0.data, (int)src0.step, \ + (ippType *)dst.data, (int)dst.step, dstRoiSize, maskSize, \ + ippBorderRepl, (ippType)0, buffer); \ + ippsFree(buffer); \ + if (status >= 0) \ + return; \ + } \ + } \ + while ((void)0, 0) + + Ipp32s bufSize; + IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); + + int type = src0.type(); + if (type == CV_8UC1) + IPP_FILTER_MEDIAN_BORDER(Ipp8u, ipp8u, 8u_C1R); + else if (type == CV_16UC1) + IPP_FILTER_MEDIAN_BORDER(Ipp16u, ipp16u, 16u_C1R); + else if (type == CV_16SC1) + IPP_FILTER_MEDIAN_BORDER(Ipp16s, ipp16s, 16s_C1R); + else if (type == CV_32FC1) + IPP_FILTER_MEDIAN_BORDER(Ipp32f, ipp32f, 32f_C1R); +#undef IPP_FILTER_MEDIAN_BORDER +#endif + #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::medianBlur(src0, dst, ksize)) return; diff --git a/modules/video/src/motempl.cpp b/modules/video/src/motempl.cpp index 3fc87e657..4dfe5d7fc 100644 --- a/modules/video/src/motempl.cpp +++ b/modules/video/src/motempl.cpp @@ -80,13 +80,27 @@ void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi, Mat silh = _silhouette.getMat(), mhi = _mhi.getMat(); Size size = silh.size(); +#ifdef HAVE_IPP + int silhstep = (int)silh.step, mhistep = (int)mhi.step; +#endif if( silh.isContinuous() && mhi.isContinuous() ) { size.width *= size.height; size.height = 1; +#ifdef HAVE_IPP + silhstep = (int)silh.total(); + mhistep = (int)mhi.total() * sizeof(Ipp32f); +#endif } +#ifdef HAVE_IPP + IppStatus status = ippiUpdateMotionHistory_8u32f_C1IR((const Ipp8u *)silh.data, silhstep, (Ipp32f *)mhi.data, mhistep, + ippiSize(size.width, size.height), (Ipp32f)timestamp, (Ipp32f)duration); + if (status >= 0) + return; +#endif + #if CV_SSE2 volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2); #endif