Merge pull request #2583 from ilya-lavrenov:ippicv

2014-04-09 14:14:13 +04:00 · 2014-04-09 14:14:13 +04:00 · e013f04669
commit e013f04669
parent 96484e1ad7 444ab0ef0d
14 changed files with 675 additions and 181 deletions
--- a/cmake/OpenCVFindIPP.cmake
+++ b/cmake/OpenCVFindIPP.cmake
@ -156,6 +156,7 @@ macro(ipp_set_variables _LATEST_VERSION)
  set(IPPCC   "cc")       # color conversion
  set(IPPCV   "cv")       # computer vision
  set(IPPVM   "vm")       # vector math
+  set(IPPM    "m")        # matrix math

  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPVM}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCC}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
@ -163,6 +164,9 @@ macro(ipp_set_variables _LATEST_VERSION)
  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPI}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPS}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
  list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPCORE}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  if(NOT HAVE_IPP_ICV_ONLY)
+    list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${IPPM}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
+  endif()

 # FIXIT
 #  if(UNIX AND NOT HAVE_IPP_ICV_ONLY)
@ -177,12 +181,16 @@ macro(ipp_set_variables _LATEST_VERSION)
      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
        message(SEND_ERROR "Intel compiler EM64T libraries not found")
      endif()
-      set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
+      if(NOT APPLE)
+        set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/intel64)
+      endif()
    else()
      if(NOT EXISTS ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
        message(SEND_ERROR "Intel compiler IA32 libraries not found")
      endif()
-      set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
+      if (NOT APPLE)
+        set(INTEL_COMPILER_LIBRARY_DIR ${INTEL_COMPILER_LIBRARY_DIR}/ia32)
+      endif()
    endif()
    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}irc${CMAKE_SHARED_LIBRARY_SUFFIX})
    list(APPEND IPP_LIBRARIES ${INTEL_COMPILER_LIBRARY_DIR}/${IPP_LIB_PREFIX}imf${CMAKE_SHARED_LIBRARY_SUFFIX})
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -223,6 +223,13 @@ static inline IppiSize ippiSize(int width, int height)
    IppiSize size = { width, height };
    return size;
 }
+
+static inline IppiSize ippiSize(const cv::Size & _size)
+{
+    IppiSize size = { _size.width, _size.height };
+    return size;
+}
+
 #else
 #  define IPP_VERSION_X100 0
 #endif
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -460,7 +460,7 @@ static void add8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0))
        return;
 #endif
    (vBinOp<uchar, OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@ -479,7 +479,7 @@ static void add16u( const ushort* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0))
        return;
 #endif
    (vBinOp<ushort, OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, sz));
@ -491,7 +491,7 @@ static void add16s( const short* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz), 0))
        return;
 #endif
    (vBinOp<short, OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, sz));
@ -510,7 +510,7 @@ static void add32f( const float* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp32<float, OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, sz));
@ -529,7 +529,7 @@ static void sub8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0))
        return;
 #endif
    (vBinOp<uchar, OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@ -548,7 +548,7 @@ static void sub16u( const ushort* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0))
        return;
 #endif
    (vBinOp<ushort, OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, sz));
@ -560,7 +560,7 @@ static void sub16s( const short* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0))
+    if (0 <= ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz), 0))
        return;
 #endif
    (vBinOp<short, OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, sz));
@ -579,7 +579,7 @@ static void sub32f( const float* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp32<float, OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, sz));
@ -801,7 +801,7 @@ static void absdiff8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp<uchar, OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@ -820,7 +820,7 @@ static void absdiff16u( const ushort* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp<ushort, OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, sz));
@ -846,7 +846,7 @@ static void absdiff32f( const float* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp32<float, OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, sz));
@ -866,7 +866,7 @@ static void and8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp<uchar, OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@ -878,7 +878,7 @@ static void or8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp<uchar, OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@ -890,7 +890,7 @@ static void xor8u( const uchar* src1, size_t step1,
 {
 #if (ARITHM_USE_IPP == 1)
    fixSteps(sz, sizeof(dst[0]), step1, step2, step);
-    if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz))
+    if (0 <= ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp<uchar, OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@ -901,8 +901,8 @@ static void not8u( const uchar* src1, size_t step1,
                   uchar* dst, size_t step, Size sz, void* )
 {
 #if (ARITHM_USE_IPP == 1)
-    fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2;
-    if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz))
+    fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void)src2;
+    if (0 <= ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, ippiSize(sz)))
        return;
 #endif
    (vBinOp<uchar, OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, sz));
@ -2386,7 +2386,7 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
    if( op  >= 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
            return;
    }
 #endif
@ -2469,7 +2469,7 @@ static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t
    if( op  >= 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
            return;
    }
 #endif
@ -2484,7 +2484,7 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
    if( op  > 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
            return;
    }
 #endif
@ -2590,7 +2590,7 @@ static void cmp32f(const float* src1, size_t step1, const float* src2, size_t st
    if( op  >= 0 )
    {
        fixSteps(size, sizeof(dst[0]), step1, step2, step);
-        if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op))
+        if (0 <= ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(size), op))
            return;
    }
 #endif
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@ -1079,6 +1079,33 @@ dtype* dst, size_t dstep, Size size, double* scale) \
    cvtScale_(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
 }

+#ifdef HAVE_IPP
+#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
+static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
+                         dtype* dst, size_t dstep, Size size, double*) \
+{ \
+    if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
+        return; \
+    cvt_(src, sstep, dst, dstep, size); \
+}
+
+#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
+static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
+                         dtype* dst, size_t dstep, Size size, double*) \
+{ \
+    if (ippiConvert_##ippFavor(src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
+        return; \
+    cvt_(src, sstep, dst, dstep, size); \
+}
+#else
+#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
+static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
+                         dtype* dst, size_t dstep, Size size, double*) \
+{ \
+    cvt_(src, sstep, dst, dstep, size); \
+}
+#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
+#endif

 #define DEF_CVT_FUNC(suffix, stype, dtype) \
 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
@ -1089,7 +1116,7 @@ static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \

 #define DEF_CPY_FUNC(suffix, stype) \
 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
-stype* dst, size_t dstep, Size size, double*) \
+                         stype* dst, size_t dstep, Size size, double*) \
 { \
    cpy_(src, sstep, dst, dstep, size); \
 }
@ -1160,48 +1187,48 @@ DEF_CVT_SCALE_FUNC(32f64f, float, double, double)
 DEF_CVT_SCALE_FUNC(64f,    double, double, double)

 DEF_CPY_FUNC(8u,     uchar)
-DEF_CVT_FUNC(8s8u,   schar, uchar)
-DEF_CVT_FUNC(16u8u,  ushort, uchar)
-DEF_CVT_FUNC(16s8u,  short, uchar)
-DEF_CVT_FUNC(32s8u,  int, uchar)
-DEF_CVT_FUNC(32f8u,  float, uchar)
+DEF_CVT_FUNC_F(8s8u,   schar, uchar, 8s8u_C1Rs)
+DEF_CVT_FUNC_F(16u8u,  ushort, uchar, 16u8u_C1R)
+DEF_CVT_FUNC_F(16s8u,  short, uchar, 16s8u_C1R)
+DEF_CVT_FUNC_F(32s8u,  int, uchar, 32s8u_C1R)
+DEF_CVT_FUNC_F2(32f8u,  float, uchar, 32f8u_C1RSfs)
 DEF_CVT_FUNC(64f8u,  double, uchar)

-DEF_CVT_FUNC(8u8s,   uchar, schar)
-DEF_CVT_FUNC(16u8s,  ushort, schar)
-DEF_CVT_FUNC(16s8s,  short, schar)
-DEF_CVT_FUNC(32s8s,  int, schar)
-DEF_CVT_FUNC(32f8s,  float, schar)
+DEF_CVT_FUNC_F2(8u8s,   uchar, schar, 8u8s_C1RSfs)
+DEF_CVT_FUNC_F2(16u8s,  ushort, schar, 16u8s_C1RSfs)
+DEF_CVT_FUNC_F2(16s8s,  short, schar, 16s8s_C1RSfs)
+DEF_CVT_FUNC_F(32s8s,  int, schar, 32s8s_C1R)
+DEF_CVT_FUNC_F2(32f8s,  float, schar, 32f8s_C1RSfs)
 DEF_CVT_FUNC(64f8s,  double, schar)

-DEF_CVT_FUNC(8u16u,  uchar, ushort)
-DEF_CVT_FUNC(8s16u,  schar, ushort)
+DEF_CVT_FUNC_F(8u16u,  uchar, ushort, 8u16u_C1R)
+DEF_CVT_FUNC_F(8s16u,  schar, ushort, 8s16u_C1Rs)
 DEF_CPY_FUNC(16u,    ushort)
-DEF_CVT_FUNC(16s16u, short, ushort)
-DEF_CVT_FUNC(32s16u, int, ushort)
-DEF_CVT_FUNC(32f16u, float, ushort)
+DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
+DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
+DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
 DEF_CVT_FUNC(64f16u, double, ushort)

-DEF_CVT_FUNC(8u16s,  uchar, short)
-DEF_CVT_FUNC(8s16s,  schar, short)
-DEF_CVT_FUNC(16u16s, ushort, short)
-DEF_CVT_FUNC(32s16s, int, short)
-DEF_CVT_FUNC(32f16s, float, short)
+DEF_CVT_FUNC_F(8u16s,  uchar, short, 8u16s_C1R)
+DEF_CVT_FUNC_F(8s16s,  schar, short, 8s16s_C1R)
+DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
+DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
+DEF_CVT_FUNC_F2(32f16s, float, short, 32f16s_C1RSfs)
 DEF_CVT_FUNC(64f16s, double, short)

-DEF_CVT_FUNC(8u32s,  uchar, int)
-DEF_CVT_FUNC(8s32s,  schar, int)
-DEF_CVT_FUNC(16u32s, ushort, int)
-DEF_CVT_FUNC(16s32s, short, int)
+DEF_CVT_FUNC_F(8u32s,  uchar, int, 8u32s_C1R)
+DEF_CVT_FUNC_F(8s32s,  schar, int, 8s32s_C1R)
+DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
+DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
 DEF_CPY_FUNC(32s,    int)
-DEF_CVT_FUNC(32f32s, float, int)
+DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
 DEF_CVT_FUNC(64f32s, double, int)

-DEF_CVT_FUNC(8u32f,  uchar, float)
-DEF_CVT_FUNC(8s32f,  schar, float)
-DEF_CVT_FUNC(16u32f, ushort, float)
-DEF_CVT_FUNC(16s32f, short, float)
-DEF_CVT_FUNC(32s32f, int, float)
+DEF_CVT_FUNC_F(8u32f,  uchar, float, 8u32f_C1R)
+DEF_CVT_FUNC_F(8s32f,  schar, float, 8s32f_C1R)
+DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
+DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
+DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
 DEF_CVT_FUNC(64f32f, double, float)

 DEF_CVT_FUNC(8u64f,  uchar, double)
@ -1434,7 +1461,7 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)
        Size sz((int)(it.size*cn), 1);

        for( size_t i = 0; i < it.nplanes; i++, ++it )
-            func(ptrs[0], 0, 0, 0, ptrs[1], 0, sz, scale);
+            func(ptrs[0], 1, 0, 0, ptrs[1], 1, sz, scale);
    }
 }

--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@ -495,25 +495,17 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
    else
        kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;

-    Size size = _src.size();
-    int cols = size.width, rows = size.height;
-    if ((cols == 1 && flipType == FLIP_COLS) ||
-            (rows == 1 && flipType == FLIP_ROWS) ||
-            (rows == 1 && cols == 1 && flipType == FLIP_BOTH))
-    {
-        _src.copyTo(_dst);
-        return true;
-    }
-
    ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
        format( "-D T=%s -D T1=%s -D cn=%d", ocl::memopTypeToStr(type),
                ocl::memopTypeToStr(depth), cn));
    if (k.empty())
        return false;

+    Size size = _src.size();
    _dst.create(size, type);
    UMat src = _src.getUMat(), dst = _dst.getUMat();

+    int cols = size.width, rows = size.height;
    cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols;
    rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows;

@ -531,13 +523,59 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
 void flip( InputArray _src, OutputArray _dst, int flip_mode )
 {
    CV_Assert( _src.dims() <= 2 );
+    Size size = _src.size();

-    CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src,_dst, flip_mode))
+    if (flip_mode < 0)
+    {
+        if (size.width == 1)
+            flip_mode = 0;
+        if (size.height == 1)
+            flip_mode = 1;
+    }
+
+    if ((size.width == 1 && flip_mode > 0) ||
+        (size.height == 1 && flip_mode == 0) ||
+        (size.height == 1 && size.width == 1 && flip_mode < 0))
+    {
+        return _src.copyTo(_dst);
+    }
+
+    CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode))

    Mat src = _src.getMat();
-    _dst.create( src.size(), src.type() );
+    int type = src.type();
+    _dst.create( size, type );
    Mat dst = _dst.getMat();
-    size_t esz = src.elemSize();
+    size_t esz = CV_ELEM_SIZE(type);
+
+#ifdef HAVE_IPP
+    typedef IppStatus (CV_STDCALL * ippiMirror)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize, IppiAxis flip);
+    ippiMirror ippFunc =
+        type == CV_8UC1 ? (ippiMirror)ippiMirror_8u_C1R :
+        type == CV_8UC3 ? (ippiMirror)ippiMirror_8u_C3R :
+        type == CV_8UC4 ? (ippiMirror)ippiMirror_8u_C4R :
+        type == CV_16UC1 ? (ippiMirror)ippiMirror_16u_C1R :
+        type == CV_16UC3 ? (ippiMirror)ippiMirror_16u_C3R :
+        type == CV_16UC4 ? (ippiMirror)ippiMirror_16u_C4R :
+        type == CV_16SC1 ? (ippiMirror)ippiMirror_16s_C1R :
+        type == CV_16SC3 ? (ippiMirror)ippiMirror_16s_C3R :
+        type == CV_16SC4 ? (ippiMirror)ippiMirror_16s_C4R :
+        type == CV_32SC1 ? (ippiMirror)ippiMirror_32s_C1R :
+        type == CV_32SC3 ? (ippiMirror)ippiMirror_32s_C3R :
+        type == CV_32SC4 ? (ippiMirror)ippiMirror_32s_C4R :
+        type == CV_32FC1 ? (ippiMirror)ippiMirror_32f_C1R :
+        type == CV_32FC3 ? (ippiMirror)ippiMirror_32f_C3R :
+        type == CV_32FC4 ? (ippiMirror)ippiMirror_32f_C4R : 0;
+    IppiAxis axis = flip_mode == 0 ? ippAxsHorizontal :
+        flip_mode > 0 ? ippAxsVertical : ippAxsBoth;
+
+    if (ippFunc != 0)
+    {
+        IppStatus status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, ippiSize(src.cols, src.rows), axis);
+        if (status >= 0)
+            return;
+    }
+#endif

    if( flip_mode <= 0 )
        flipVert( src.data, src.step, dst.data, dst.step, src.size(), esz );
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -238,6 +238,12 @@ float  cubeRoot( float value )

 static void Magnitude_32f(const float* x, const float* y, float* mag, int len)
 {
+#ifdef HAVE_IPP
+    IppStatus status = ippsMagnitude_32f(x, y, mag, len);
+    if (status >= 0)
+        return;
+#endif
+
    int i = 0;

 #if CV_SSE
@ -264,6 +270,12 @@ static void Magnitude_32f(const float* x, const float* y, float* mag, int len)

 static void Magnitude_64f(const double* x, const double* y, double* mag, int len)
 {
+#ifdef HAVE_IPP
+    IppStatus status = ippsMagnitude_64f(x, y, mag, len);
+    if (status >= 0)
+        return;
+#endif
+
    int i = 0;

 #if CV_SSE2
@ -291,6 +303,11 @@ static void Magnitude_64f(const double* x, const double* y, double* mag, int len

 static void InvSqrt_32f(const float* src, float* dst, int len)
 {
+#ifdef HAVE_IPP
+    if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
+        return;
+#endif
+
    int i = 0;

 #if CV_SSE
@ -334,6 +351,10 @@ static void InvSqrt_64f(const double* src, double* dst, int len)

 static void Sqrt_32f(const float* src, float* dst, int len)
 {
+#ifdef HAVE_IPP
+    if (ippsSqrt_32f_A21(src, dst, len) >= 0)
+        return;
+#endif
    int i = 0;

 #if CV_SSE
@ -363,6 +384,11 @@ static void Sqrt_32f(const float* src, float* dst, int len)

 static void Sqrt_64f(const double* src, double* dst, int len)
 {
+#ifdef HAVE_IPP
+    if (ippsSqrt_64f_A50(src, dst, len) >= 0)
+        return;
+#endif
+
    int i = 0;

 #if CV_SSE2
@ -729,6 +755,22 @@ void polarToCart( InputArray src1, InputArray src2,
    dst2.create( Angle.dims, Angle.size, type );
    Mat X = dst1.getMat(), Y = dst2.getMat();

+#ifdef HAVE_IPP
+    if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
+    {
+        typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
+                                                         void * pDstRe, void * pDstIm, int len);
+        ippsPolarToCart ippFunc =
+        depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f :
+        depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0;
+        CV_Assert(ippFunc != 0);
+
+        IppStatus status = ippFunc(Mag.data, Angle.data, X.data, Y.data, static_cast<int>(cn * X.total()));
+        if (status >= 0)
+            return;
+    }
+#endif
+
    const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
    uchar* ptrs[4];
    NAryMatIterator it(arrays, ptrs);
@ -2119,6 +2161,29 @@ void pow( InputArray _src, double power, OutputArray _dst )
            _src.copyTo(_dst);
            return;
        case 2:
+#ifdef HAVE_IPP
+            if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) || (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) ))
+            {
+                Mat src = _src.getMat();
+                _dst.create( src.dims, src.size, type );
+                Mat dst = _dst.getMat();
+
+                Size size = src.size();
+                int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type);
+                if (src.isContinuous() && dst.isContinuous())
+                {
+                    size.width = (int)src.total();
+                    size.height = 1;
+                    srcstep = dststep = (int)src.total() * esz;
+                }
+                size.width *= cn;
+
+                IppStatus status = ippiSqr_32f_C1R((const Ipp32f *)src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+
+                if (status >= 0)
+                    return;
+            }
+#endif
            if (same)
                multiply(_dst, _dst, _dst);
            else
@ -2168,6 +2233,18 @@ void pow( InputArray _src, double power, OutputArray _dst )
    }
    else
    {
+#ifdef HAVE_IPP
+        if (src.isContinuous() && dst.isContinuous())
+        {
+            IppStatus status = depth == CV_32F ?
+                        ippsPowx_32f_A21((const Ipp32f *)src.data, (Ipp32f)power, (Ipp32f*)dst.data, (Ipp32s)(src.total() * cn)) :
+                        ippsPowx_64f_A50((const Ipp64f *)src.data, power, (Ipp64f*)dst.data, (Ipp32s)(src.total() * cn));
+
+            if (status >= 0)
+                return;
+        }
+#endif
+
        int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
        size_t esz1 = src.elemSize1();

--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@ -2212,7 +2212,7 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray
    Mat src1 = _src1.getMat(), src2 = _src2.getMat();
    CV_Assert(src1.size == src2.size);

-    _dst.create(src1.dims, src1.size, src1.type());
+    _dst.create(src1.dims, src1.size, type);
    Mat dst = _dst.getMat();

    float falpha = (float)alpha;
@ -2220,9 +2220,16 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray

    ScaleAddFunc func = depth == CV_32F ? (ScaleAddFunc)scaleAdd_32f : (ScaleAddFunc)scaleAdd_64f;

-    if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() )
+    if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous())
    {
        size_t len = src1.total()*cn;
+#if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY
+        if (depth == CV_32F &&
+                ippmSaxpy_vava_32f((const Ipp32f *)src1.data, (int)src1.step, sizeof(Ipp32f), falpha,
+                (const Ipp32f *)src2.data, (int)src2.step, sizeof(Ipp32f),
+                (Ipp32f *)dst.data, (int)dst.step, sizeof(Ipp32f), (int)len, 1) >= 0)
+            return;
+#endif
        func(src1.data, src2.data, dst.data, (int)len, palpha);
        return;
    }
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@ -2967,6 +2967,30 @@ void cv::transpose( InputArray _src, OutputArray _dst )
        return;
    }

+#ifdef HAVE_IPP
+    typedef IppStatus (CV_STDCALL * ippiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize);
+    ippiTranspose ippFunc =
+    type == CV_8UC1 ? (ippiTranspose)ippiTranspose_8u_C1R :
+    type == CV_8UC3 ? (ippiTranspose)ippiTranspose_8u_C3R :
+    type == CV_8UC4 ? (ippiTranspose)ippiTranspose_8u_C4R :
+    type == CV_16UC1 ? (ippiTranspose)ippiTranspose_16u_C1R :
+    type == CV_16UC3 ? (ippiTranspose)ippiTranspose_16u_C3R :
+    type == CV_16UC4 ? (ippiTranspose)ippiTranspose_16u_C4R :
+    type == CV_16SC1 ? (ippiTranspose)ippiTranspose_16s_C1R :
+    type == CV_16SC3 ? (ippiTranspose)ippiTranspose_16s_C3R :
+    type == CV_16SC4 ? (ippiTranspose)ippiTranspose_16s_C4R :
+    type == CV_32SC1 ? (ippiTranspose)ippiTranspose_32s_C1R :
+    type == CV_32SC3 ? (ippiTranspose)ippiTranspose_32s_C3R :
+    type == CV_32SC4 ? (ippiTranspose)ippiTranspose_32s_C4R :
+    type == CV_32FC1 ? (ippiTranspose)ippiTranspose_32f_C1R :
+    type == CV_32FC3 ? (ippiTranspose)ippiTranspose_32f_C3R :
+    type == CV_32FC4 ? (ippiTranspose)ippiTranspose_32f_C4R : 0;
+
+    IppiSize roiSize = { src.cols, src.rows };
+    if (ippFunc != 0 && ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, roiSize) >= 0)
+        return;
+#endif
+
    if( dst.data == src.data )
    {
        TransposeInplaceFunc func = transposeInplaceTab[esz];
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@ -933,10 +933,10 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
            dcn_stddev = (int)stddev.total();
            pstddev = (Ipp64f *)stddev.data;
        }
-        for( int k = cn; k < dcn_mean; k++ )
-            pmean[k] = 0;
-        for( int k = cn; k < dcn_stddev; k++ )
-            pstddev[k] = 0;
+        for( int c = cn; c < dcn_mean; c++ )
+            pmean[c] = 0;
+        for( int c = cn; c < dcn_stddev; c++ )
+            pstddev[c] = 0;
        IppiSize sz = { cols, rows };
        int type = src.type();
        if( !mask.empty() )
@ -2016,6 +2016,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
    size_t total_size = src.total();
    int rows = src.size[0], cols = (int)(total_size/rows);
+
    if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
        && cols > 0 && (size_t)rows*cols == total_size
        && (normType == NORM_INF || normType == NORM_L1 ||
--- a/modules/imgproc/src/accum.cpp
+++ b/modules/imgproc/src/accum.cpp
@ -457,6 +457,56 @@ void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _m

    Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();

+#ifdef HAVE_IPP
+    if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
+    {
+        typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
+        typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
+                                                           int srcDstStep, IppiSize roiSize);
+        ippiAddSquare ippFunc = 0;
+        ippiAddSquareMask ippFuncMask = 0;
+
+        if (mask.empty())
+        {
+            ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0;
+        }
+        else if (scn == 1)
+        {
+            ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0;
+        }
+
+        if (ippFunc || ippFuncMask)
+        {
+            IppStatus status = ippStsNoErr;
+
+            Size size = src.size();
+            int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
+            if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
+            {
+                srcstep = static_cast<int>(src.total() * src.elemSize());
+                dststep = static_cast<int>(dst.total() * dst.elemSize());
+                maskstep = static_cast<int>(mask.total() * mask.elemSize());
+                size.width = static_cast<int>(src.total());
+                size.height = 1;
+            }
+            size.width *= scn;
+
+            if (mask.empty())
+                status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+            else
+                status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep,
+                                     (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+
+            if (status >= 0)
+                return;
+        }
+    }
+#endif
+
    int fidx = getAccTabIdx(sdepth, ddepth);
    AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0;
    CV_Assert( func != 0 );
@ -485,6 +535,59 @@ void cv::accumulateProduct( InputArray _src1, InputArray _src2,

    Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat();

+#ifdef HAVE_IPP
+    if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous()))
+    {
+        typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2,
+                                                        int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
+        typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step,
+                                                            const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
+        ippiAddProduct ippFunc = 0;
+        ippiAddProductMask ippFuncMask = 0;
+
+        if (mask.empty())
+        {
+            ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0;
+        }
+        else if (scn == 1)
+        {
+            ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0;
+        }
+
+        if (ippFunc || ippFuncMask)
+        {
+            IppStatus status = ippStsNoErr;
+
+            Size size = src1.size();
+            int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step;
+            if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous())
+            {
+                src1step = static_cast<int>(src1.total() * src1.elemSize());
+                src2step = static_cast<int>(src2.total() * src2.elemSize());
+                dststep = static_cast<int>(dst.total() * dst.elemSize());
+                maskstep = static_cast<int>(mask.total() * mask.elemSize());
+                size.width = static_cast<int>(src1.total());
+                size.height = 1;
+            }
+            size.width *= scn;
+
+            if (mask.empty())
+                status = ippFunc(src1.data, src1step, src2.data, src2step, (Ipp32f *)dst.data,
+                                 dststep, ippiSize(size.width, size.height));
+            else
+                status = ippFuncMask(src1.data, src1step, src2.data, src2step, (Ipp8u *)mask.data, maskstep,
+                                     (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height));
+
+            if (status >= 0)
+                return;
+        }
+    }
+#endif
+
    int fidx = getAccTabIdx(sdepth, ddepth);
    AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0;
    CV_Assert( func != 0 );
@ -512,6 +615,58 @@ void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst,

    Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();

+#ifdef HAVE_IPP
+    if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous()))
+    {
+        typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep,
+                                                         IppiSize roiSize, Ipp32f alpha);
+        typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask,
+                                                             int maskStep, Ipp32f * pSrcDst,
+                                                             int srcDstStep, IppiSize roiSize, Ipp32f alpha);
+        ippiAddWeighted ippFunc = 0;
+        ippiAddWeightedMask ippFuncMask = 0;
+
+        if (mask.empty())
+        {
+            ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0;
+        }
+        else if (scn == 1)
+        {
+            ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR :
+                sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR :
+                sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0;
+        }
+
+        if (ippFunc || ippFuncMask)
+        {
+            IppStatus status = ippStsNoErr;
+
+            Size size = src.size();
+            int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
+            if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
+            {
+                srcstep = static_cast<int>(src.total() * src.elemSize());
+                dststep = static_cast<int>(dst.total() * dst.elemSize());
+                maskstep = static_cast<int>(mask.total() * mask.elemSize());
+                size.width = static_cast<int>((int)src.total());
+                size.height = 1;
+            }
+            size.width *= scn;
+
+            if (mask.empty())
+                status = ippFunc(src.data, srcstep, (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
+            else
+                status = ippFuncMask(src.data, srcstep, (Ipp8u *)mask.data, maskstep,
+                                     (Ipp32f *)dst.data, dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
+
+            if (status >= 0)
+                return;
+        }
+    }
+#endif
+
    int fidx = getAccTabIdx(sdepth, ddepth);
    AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0;
    CV_Assert( func != 0 );
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@ -298,7 +298,7 @@ static ippiReorderFunc ippiSwapChannelsC3RTab[] =
    0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
 };

-#if (IPP_VERSION_X100 >= 801)
+#if IPP_VERSION_X100 >= 801
 static ippiReorderFunc ippiSwapChannelsC4RTab[] =
 {
    (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0,
@ -3315,7 +3315,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
            CV_Assert( scn == 3 || scn == 4 );
            _dst.create(sz, CV_MAKETYPE(depth, 1));
            dst = _dst.getMat();
-/**/
+
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 /*
            if( code == CV_BGR2GRAY )
@ -3341,7 +3341,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                    return;
            }
 #endif
-/**/
+
            bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;

            if( depth == CV_8U )
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -61,9 +61,9 @@ namespace cv
    typedef IppStatus (CV_STDCALL* ippiResizeGetSrcOffset)(void*, IppiPoint, IppiPoint*);
 #endif

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) && 0
    typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
-    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
+    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
    typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);

    template <int channels, typename Type>
@ -75,7 +75,7 @@ namespace cv
        return func(values, dataPointer, step, size) >= 0;
    }

-    bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
+    static bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
    {
        if( channels == 1 )
        {
@ -3892,11 +3892,11 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
 namespace cv
 {

-class warpAffineInvoker :
+class WarpAffineInvoker :
    public ParallelLoopBody
 {
 public:
-    warpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
+    WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
                      const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) :
        ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
        borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
@ -4013,16 +4013,20 @@ private:
    double *M;
 };

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-class IPPwarpAffineInvoker :
+
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+class IPPWarpAffineInvoker :
    public ParallelLoopBody
 {
 public:
-    IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
-      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
-      {
-          *ok = true;
-      }
+    IPPWarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int _borderType,
+                         const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
+        ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
+        borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+    {
+        *ok = true;
+    }

    virtual void operator() (const Range& range) const
    {
@ -4040,21 +4044,26 @@ public:
                return;
            }
        }
-        if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+
+        // Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+        IppStatus status = func( src.data, srcsize, (int)src.step[0], srcroi, dst.data,
+                                (int)dst.step[0], dstroi, coeffs, mode );
+        if( status < 0)
            *ok = false;
    }
 private:
    Mat &src;
    Mat &dst;
-    double (&coeffs)[2][3];
    int mode;
+    double (&coeffs)[2][3];
    int borderType;
    Scalar borderValue;
    ippiWarpAffineBackFunc func;
    bool *ok;
-    const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
+    const IPPWarpAffineInvoker& operator= (const IPPWarpAffineInvoker&);
 };
 #endif
+    */

 #ifdef HAVE_OPENCL

@ -4204,16 +4213,19 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
    int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
    const int AB_BITS = MAX(10, (int)INTER_BITS);
    const int AB_SCALE = 1 << AB_BITS;
-/*
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    int depth = src.depth();
-    int channels = src.channels();
+
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+    int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
-        ( channels == 1 || channels == 3 || channels == 4 ) &&
-        ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
+       ( cn == 1 || cn == 3 || cn == 4 ) &&
+       ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) &&
+       ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT) )
    {
-        int type = src.type();
-        ippiWarpAffineBackFunc ippFunc =
+        ippiWarpAffineBackFunc ippFunc = 0;
+        if ((flags & WARP_INVERSE_MAP) != 0)
+        {
+            ippFunc =
            type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
            type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
            type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
@ -4224,31 +4236,43 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
            type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
            type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
            0;
-        int mode =
-            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
-            flags == INTER_NEAREST ? IPPI_INTER_NN :
-            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
-            0;
-        if( mode && ippFunc )
-        {
-            double coeffs[2][3];
-            for( int i = 0; i < 2; i++ )
-            {
-                for( int j = 0; j < 3; j++ )
-                {
-                    coeffs[i][j] = matM.at<double>(i, j);
-                }
-            }
-            bool ok;
-            Range range(0, dst.rows);
-            IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
-            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
-            if( ok )
-                return;
        }
+        else
+        {
+            ippFunc =
+            type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C4R :
+            0;
+        }
+        int mode =
+        interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
+        interpolation == INTER_NEAREST ? IPPI_INTER_NN :
+        interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC :
+        0;
+        CV_Assert(mode && ippFunc);
+
+        double coeffs[2][3];
+        for( int i = 0; i < 2; i++ )
+            for( int j = 0; j < 3; j++ )
+                coeffs[i][j] = matM.at<double>(i, j);
+
+        bool ok;
+        Range range(0, dst.rows);
+        IPPWarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
    }
 #endif
-*/
+     */
+
    for( x = 0; x < dst.cols; x++ )
    {
        adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@ -4256,7 +4280,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
    }

    Range range(0, dst.rows);
-    warpAffineInvoker invoker(src, dst, interpolation, borderType,
+    WarpAffineInvoker invoker(src, dst, interpolation, borderType,
                              borderValue, adelta, bdelta, M);
    parallel_for_(range, invoker, dst.total()/(double)(1<<16));
 }
@ -4265,12 +4289,12 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
 namespace cv
 {

-class warpPerspectiveInvoker :
+class WarpPerspectiveInvoker :
    public ParallelLoopBody
 {
 public:

-    warpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation,
+    WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation,
                           int _borderType, const Scalar &_borderValue) :
        ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
        borderType(_borderType), borderValue(_borderValue)
@ -4356,16 +4380,19 @@ private:
    Scalar borderValue;
 };

-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-class IPPwarpPerspectiveInvoker :
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+class IPPWarpPerspectiveInvoker :
    public ParallelLoopBody
 {
 public:
-    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
-      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
-      {
-          *ok = true;
-      }
+    IPPWarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation,
+        int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveFunc _func, bool *_ok) :
+        ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
+        borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+    {
+        *ok = true;
+    }

    virtual void operator() (const Range& range) const
    {
@ -4384,22 +4411,25 @@ public:
                return;
            }
        }
-        if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
+
+        IppStatus status = func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode);
+        if (status != ippStsNoErr)
            *ok = false;
    }
 private:
    Mat &src;
    Mat &dst;
-    double (&coeffs)[3][3];
    int mode;
+    double (&coeffs)[3][3];
    int borderType;
    const Scalar borderValue;
-    ippiWarpPerspectiveBackFunc func;
+    ippiWarpPerspectiveFunc func;
    bool *ok;
-    const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
+
+    const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&);
 };
 #endif
-
+    */
 }

 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
@ -4432,55 +4462,65 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
        return;
 #endif

-    if( !(flags & WARP_INVERSE_MAP) )
-         invert(matM, matM);
-/*
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    int depth = src.depth();
-    int channels = src.channels();
-    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
-        ( channels == 1 || channels == 3 || channels == 4 ) &&
-        ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
+    /*
+#if defined (HAVE_IPP) && IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR >= 801
+    int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    if( (depth == CV_8U || depth == CV_16U || depth == CV_32F) &&
+       (cn == 1 || cn == 3 || cn == 4) &&
+       ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) &&
+       (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC))
    {
-        int type = src.type();
-        ippiWarpPerspectiveBackFunc ippFunc =
-            type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
-            type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
-            type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
-            type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
-            type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
-            type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
-            type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
-            type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
-            type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
-            0;
-        int mode =
-            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
-            flags == INTER_NEAREST ? IPPI_INTER_NN :
-            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
-            0;
-        if( mode && ippFunc )
+        ippiWarpPerspectiveFunc ippFunc = 0;
+        if ((flags & WARP_INVERSE_MAP) != 0)
        {
-            double coeffs[3][3];
-            for( int i = 0; i < 3; i++ )
-            {
-                for( int j = 0; j < 3; j++ )
-                {
-                    coeffs[i][j] = matM.at<double>(i, j);
-                }
-            }
-            bool ok;
-            Range range(0, dst.rows);
-            IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
-            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
-            if( ok )
-                return;
+            ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C4R : 0;
        }
+        else
+        {
+            ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C4R : 0;
+        }
+        int mode =
+        interpolation == INTER_NEAREST ? IPPI_INTER_NN :
+        interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
+        interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : 0;
+        CV_Assert(mode && ippFunc);
+
+        double coeffs[3][3];
+        for( int i = 0; i < 3; i++ )
+            for( int j = 0; j < 3; j++ )
+                coeffs[i][j] = matM.at<double>(i, j);
+
+        bool ok;
+        Range range(0, dst.rows);
+        IPPWarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
    }
 #endif
-*/
+    */
+
+    if( !(flags & WARP_INVERSE_MAP) )
+        invert(matM, matM);
+
    Range range(0, dst.rows);
-    warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
+    WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
    parallel_for_(range, invoker, dst.total()/(double)(1<<16));
 }

--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@ -841,7 +841,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
    CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize))

    Mat src = _src.getMat();
-    int sdepth = src.depth(), cn = src.channels();
+    int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
    if( ddepth < 0 )
        ddepth = sdepth;
    _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
@ -858,6 +858,69 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
        return;
 #endif

+#ifdef HAVE_IPP
+    int ippBorderType = borderType & ~BORDER_ISOLATED;
+    Point ocvAnchor, ippAnchor;
+    ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
+    ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
+    ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
+    ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);
+
+    if (normalize && !src.isSubmatrix() && ddepth == sdepth &&
+        (ippBorderType == BORDER_REPLICATE || ippBorderType == BORDER_CONSTANT) &&
+        ocvAnchor == ippAnchor )
+    {
+        Ipp32s bufSize;
+        IppiSize roiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize.width, ksize.height);
+
+#define IPP_FILTER_BOX_BORDER(ippType, ippDataType, flavor) \
+        do \
+        { \
+            if (ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippDataType, cn, &bufSize) >= 0) \
+            { \
+                Ipp8u * buffer = ippsMalloc_8u(bufSize); \
+                ippType borderValue[4] = { 0, 0, 0, 0 }; \
+                ippBorderType = ippBorderType == BORDER_CONSTANT ? ippBorderConst : ippBorderType == BORDER_REPLICATE ? ippBorderRepl : -1; \
+                CV_Assert(ippBorderType >= 0); \
+                IppStatus status = ippiFilterBoxBorder_##flavor((ippType *)src.data, (int)src.step, (ippType *)dst.data, (int)dst.step, roiSize, maskSize, \
+                                                                (IppiBorderType)ippBorderType, borderValue, buffer); \
+                ippsFree(buffer); \
+                if (status >= 0) \
+                    return; \
+            } \
+        } while ((void)0, 0)
+
+        if (stype == CV_8UC1)
+            IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C1R);
+        else if (stype == CV_8UC3)
+            IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C3R);
+        else if (stype == CV_8UC4)
+            IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C4R);
+
+        else if (stype == CV_16UC1)
+            IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C1R);
+        else if (stype == CV_16UC3)
+            IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C3R);
+        else if (stype == CV_16UC4)
+            IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C4R);
+
+        else if (stype == CV_16SC1)
+            IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C1R);
+        else if (stype == CV_16SC3)
+            IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C3R);
+        else if (stype == CV_16SC4)
+            IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C4R);
+
+        else if (stype == CV_32FC1)
+            IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C1R);
+        else if (stype == CV_32FC3)
+            IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C3R);
+        else if (stype == CV_32FC4)
+            IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C4R);
+    }
+#undef IPP_FILTER_BOX_BORDER
+#endif
+
    Ptr<FilterEngine> f = createBoxFilter( src.type(), dst.type(),
                        ksize, anchor, normalize, borderType );
    f->apply( src, dst );
@ -1948,13 +2011,46 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize )
        return;
    }

-    CV_OCL_RUN(_src0.dims() <= 2 && _dst.isUMat(),
+    CV_OCL_RUN(_dst.isUMat(),
               ocl_medianFilter(_src0,_dst, ksize))

    Mat src0 = _src0.getMat();
    _dst.create( src0.size(), src0.type() );
    Mat dst = _dst.getMat();

+#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 8 && IPP_VERSION_MINOR >= 1
+#define IPP_FILTER_MEDIAN_BORDER(ippType, ippDataType, flavor) \
+    do \
+    { \
+        if (ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, \
+            ippDataType, CV_MAT_CN(type), &bufSize) >= 0) \
+        { \
+            Ipp8u * buffer = ippsMalloc_8u(bufSize); \
+            IppStatus status = ippiFilterMedianBorder_##flavor((const ippType *)src0.data, (int)src0.step, \
+                (ippType *)dst.data, (int)dst.step, dstRoiSize, maskSize, \
+                ippBorderRepl, (ippType)0, buffer); \
+            ippsFree(buffer); \
+            if (status >= 0) \
+                return; \
+        } \
+    } \
+    while ((void)0, 0)
+
+    Ipp32s bufSize;
+    IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize);
+
+    int type = src0.type();
+    if (type == CV_8UC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp8u, ipp8u, 8u_C1R);
+    else if (type == CV_16UC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp16u, ipp16u, 16u_C1R);
+    else if (type == CV_16SC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp16s, ipp16s, 16s_C1R);
+    else if (type == CV_32FC1)
+        IPP_FILTER_MEDIAN_BORDER(Ipp32f, ipp32f, 32f_C1R);
+#undef IPP_FILTER_MEDIAN_BORDER
+#endif
+
 #ifdef HAVE_TEGRA_OPTIMIZATION
    if (tegra::medianBlur(src0, dst, ksize))
        return;
--- a/modules/video/src/motempl.cpp
+++ b/modules/video/src/motempl.cpp
@ -80,13 +80,27 @@ void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi,

    Mat silh = _silhouette.getMat(), mhi = _mhi.getMat();
    Size size = silh.size();
+#ifdef HAVE_IPP
+    int silhstep = (int)silh.step, mhistep = (int)mhi.step;
+#endif

    if( silh.isContinuous() && mhi.isContinuous() )
    {
        size.width *= size.height;
        size.height = 1;
+#ifdef HAVE_IPP
+        silhstep = (int)silh.total();
+        mhistep = (int)mhi.total() * sizeof(Ipp32f);
+#endif
    }

+#ifdef HAVE_IPP
+    IppStatus status = ippiUpdateMotionHistory_8u32f_C1IR((const Ipp8u *)silh.data, silhstep, (Ipp32f *)mhi.data, mhistep,
+                                                          ippiSize(size.width, size.height), (Ipp32f)timestamp, (Ipp32f)duration);
+    if (status >= 0)
+        return;
+#endif
+
 #if CV_SSE2
    volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2);
 #endif