fix gpu module compilation under linux

2010-11-08 09:55:10 +00:00
parent 4cdcf37139
commit 863d61e9eb
4 changed files with 425 additions and 252 deletions
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -84,162 +84,230 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool,

 #else /* !defined (HAVE_CUDA) */

+#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR)
+
+#if (defined(_WIN32) || defined(_WIN64)) && (NPP_VERSION >= 32)
+#   define NPP_HAVE_COMPLEX_TYPE
+#endif
+
 ////////////////////////////////////////////////////////////////////////
 // add subtract multiply divide

 namespace
 {
-	typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, 
-										 NppiSize oSizeROI, int nScaleFactor);
-    typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, 
-									      int nDstStep, NppiSize oSizeROI);  
-	typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, 
-									      int nDstStep, NppiSize oSizeROI);    
+    typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep,
+                                         NppiSize oSizeROI, int nScaleFactor);
+    typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst,
+                                          int nDstStep, NppiSize oSizeROI);
+    typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst,
+                                          int nDstStep, NppiSize oSizeROI);

-	void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, 
-					     npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, 
+    void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
+                         npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
                         npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)
-	{
+    {
        CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());

+#if NPP_VERSION >= 32
        CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
+#else
+        CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1);
+#endif

        dst.create( src1.size(), src1.type() );

-		NppiSize sz;
-		sz.width  = src1.cols;
-		sz.height = src1.rows;
+        NppiSize sz;
+        sz.width  = src1.cols;
+        sz.height = src1.rows;

        switch (src1.type())
        {
        case CV_8UC1:
-			nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step, 
-				src2.ptr<Npp8u>(), src2.step, 
-				dst.ptr<Npp8u>(), dst.step, sz, 0) );
+            nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step,
+                src2.ptr<Npp8u>(), src2.step,
+                dst.ptr<Npp8u>(), dst.step, sz, 0) );
            break;
        case CV_8UC4:
-			nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step, 
-				src2.ptr<Npp8u>(), src2.step, 
-				dst.ptr<Npp8u>(), dst.step, sz, 0) );
+            nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step,
+                src2.ptr<Npp8u>(), src2.step,
+                dst.ptr<Npp8u>(), dst.step, sz, 0) );
            break;
+#if NPP_VERSION >= 32
        case CV_32SC1:
-			nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,
-				src2.ptr<Npp32s>(), src2.step,
-				dst.ptr<Npp32s>(), dst.step, sz) );
+            nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,
+                src2.ptr<Npp32s>(), src2.step,
+                dst.ptr<Npp32s>(), dst.step, sz) );
            break;
+#endif
        case CV_32FC1:
-			nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,
-				src2.ptr<Npp32f>(), src2.step,
-				dst.ptr<Npp32f>(), dst.step, sz) );
+            nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,
+                src2.ptr<Npp32f>(), src2.step,
+                dst.ptr<Npp32f>(), dst.step, sz) );
            break;
        default:
            CV_Assert(!"Unsupported source type");
        }
-	}
+    }

    template<int SCN> struct NppArithmScalarFunc;
    template<> struct NppArithmScalarFunc<1>
    {
-        typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, 
+        typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst,
                                      int nDstStep, NppiSize oSizeROI);
    };
+#ifdef NPP_HAVE_COMPLEX_TYPE
    template<> struct NppArithmScalarFunc<2>
-    {        
+    {
        typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst,
                                      int nDstStep, NppiSize oSizeROI);
    };
+#endif

    template<int SCN, typename NppArithmScalarFunc<SCN>::func_ptr func> struct NppArithmScalar;
    template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
    {
        static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-	    {
+        {
            dst.create(src.size(), src.type());

-		    NppiSize sz;
-		    sz.width  = src.cols;
-		    sz.height = src.rows;
+            NppiSize sz;
+            sz.width  = src.cols;
+            sz.height = src.rows;

-		    nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
-	    }
+            nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
+        }
    };
+#ifdef NPP_HAVE_COMPLEX_TYPE
    template<typename NppArithmScalarFunc<2>::func_ptr func> struct NppArithmScalar<2, func>
    {
        static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
-	    {
+        {
            dst.create(src.size(), src.type());

-		    NppiSize sz;
-		    sz.width  = src.cols;
-		    sz.height = src.rows;
+            NppiSize sz;
+            sz.width  = src.cols;
+            sz.height = src.rows;

            Npp32fc nValue;
            nValue.re = (Npp32f)sc[0];
            nValue.im = (Npp32f)sc[1];

-		    nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) );
-	    }
+            nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) );
+        }
    };
+#endif
 }

 void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
+#if NPP_VERSION >= 32
    nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R);
+#else
+    nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, 0, nppiAdd_32f_C1R);
+#endif
 }

-void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) 
+void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
-	nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R);
+#if NPP_VERSION >= 32
+    nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R);
+#else
+    nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, 0, nppiSub_32f_C1R);
+#endif
 }

 void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
-	nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R);
+#if NPP_VERSION >= 32
+    nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R);
+#else
+    nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, 0, nppiMul_32f_C1R);
+#endif
 }

 void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
-	nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R);
+#if NPP_VERSION >= 32
+    nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R);
+#else
+    nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, 0, nppiDiv_32f_C1R);
+#endif
 }

 void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst)
 {
+#ifdef NPP_HAVE_COMPLEX_TYPE
    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc};
+    static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc};

    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);

    callers[src.channels()](src, sc, dst);
+#else
+#   if NPP_VERSION >= 32
+        CV_Assert(src.type() == CV_32FC1);
+        NppArithmScalar<1, nppiAddC_32f_C1R>::calc(src, sc, dst);
+#   else
+        CV_Assert(!"This function doesn't supported");
+#   endif
+#endif
 }

 void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst)
 {
+#ifdef NPP_HAVE_COMPLEX_TYPE
    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc};
+    static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc};

    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);

    callers[src.channels()](src, sc, dst);
+#else
+#   if NPP_VERSION >= 32
+        CV_Assert(src.type() == CV_32FC1);
+        NppArithmScalar<1, nppiSubC_32f_C1R>::calc(src, sc, dst);
+#   else
+        CV_Assert(!"This function doesn't supported");
+#   endif
+#endif
 }

 void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst)
 {
+#ifdef NPP_HAVE_COMPLEX_TYPE
    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};
+    static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};

    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);

    callers[src.channels()](src, sc, dst);
+#else
+#   if NPP_VERSION >= 32
+        CV_Assert(src.type() == CV_32FC1);
+        NppArithmScalar<1, nppiMulC_32f_C1R>::calc(src, sc, dst);
+#   else
+        CV_Assert(!"This function doesn't supported");
+#   endif
+#endif
 }

 void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst)
 {
+#ifdef NPP_HAVE_COMPLEX_TYPE
    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst);
-    static const caller_t callers[] = {NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};
+    static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};

    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);

    callers[src.channels()](src, sc, dst);
+#else
+#   if NPP_VERSION >= 32
+        CV_Assert(src.type() == CV_32FC1);
+        NppArithmScalar<1, nppiDivC_32f_C1R>::calc(src, sc, dst);
+#   else
+        CV_Assert(!"This function doesn't supported");
+#   endif
+#endif
 }

 ////////////////////////////////////////////////////////////////////////
@@ -263,9 +331,13 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)

 void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
-	CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());

-	CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
+#if NPP_VERSION >= 32
+    CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
+#else
+    CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1);
+#endif

    dst.create( src1.size(), src1.type() );

@@ -276,20 +348,22 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
    switch (src1.type())
    {
    case CV_8UC1:
-        nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step, 
-            src2.ptr<Npp8u>(), src2.step, 
+        nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step,
+            src2.ptr<Npp8u>(), src2.step,
            dst.ptr<Npp8u>(), dst.step, sz) );
        break;
    case CV_8UC4:
-        nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step, 
-            src2.ptr<Npp8u>(), src2.step, 
+        nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step,
+            src2.ptr<Npp8u>(), src2.step,
            dst.ptr<Npp8u>(), dst.step, sz) );
        break;
+#if NPP_VERSION >= 32
    case CV_32SC1:
        nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step,
            src2.ptr<Npp32s>(), src2.step,
            dst.ptr<Npp32s>(), dst.step, sz) );
        break;
+#endif
    case CV_32FC1:
        nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step,
            src2.ptr<Npp32f>(), src2.step,
@@ -302,7 +376,8 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)

 void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
 {
-	CV_Assert(src.type() == CV_32FC1);
+#if NPP_VERSION >= 32
+    CV_Assert(src.type() == CV_32FC1);

    dst.create( src.size(), src.type() );

@@ -311,6 +386,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
    sz.height = src.rows;

    nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, (Npp32f)s[0]) );
+#else
+    CV_Assert(!"This function doesn't supported");
+#endif
 }

 ////////////////////////////////////////////////////////////////////////
@@ -322,7 +400,7 @@ namespace cv { namespace gpu { namespace mathfunc
    void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
 }}}

-void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) 
+void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop)
 {
    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());

@@ -340,8 +418,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
    {
        if (cmpop != CMP_NE)
        {
-            nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step, 
-                src2.ptr<Npp8u>(), src2.step, 
+            nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step,
+                src2.ptr<Npp8u>(), src2.step,
                dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) );
        }
        else
@@ -367,7 +445,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
 ////////////////////////////////////////////////////////////////////////
 // meanStdDev

-void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) 
+void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
 {
    CV_Assert(src.type() == CV_8UC1);

@@ -381,7 +459,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
 ////////////////////////////////////////////////////////////////////////
 // norm

-double cv::gpu::norm(const GpuMat& src1, int normType) 
+double cv::gpu::norm(const GpuMat& src1, int normType)
 {
    return norm(src1, GpuMat(src1.size(), src1.type(), Scalar::all(0.0)), normType);
 }
@@ -393,7 +471,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
    CV_Assert(src1.type() == CV_8UC1);
    CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);

-    typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, 
+    typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
        NppiSize oSizeROI, Npp64f* pRetVal);

    static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
@@ -405,8 +483,8 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
    int funcIdx = normType >> 1;
    double retVal;

-    nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, 
-        src2.ptr<Npp8u>(), src2.step, 
+    nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step,
+        src2.ptr<Npp8u>(), src2.step,
        sz, &retVal) );

    return retVal;
@@ -427,14 +505,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)

    if (src.type() == CV_8UC1)
    {
-        nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), src.step, 
-            dst.ptr<Npp8u>(), dst.step, sz, 
+        nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), src.step,
+            dst.ptr<Npp8u>(), dst.step, sz,
            (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
    }
    else
    {
-        nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), src.step, 
-            dst.ptr<Npp8u>(), dst.step, sz, 
+        nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), src.step,
+            dst.ptr<Npp8u>(), dst.step, sz,
            (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
    }
 }
@@ -444,33 +522,40 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)

 Scalar cv::gpu::sum(const GpuMat& src)
 {
-    CV_Assert(!"disabled until fix crash");
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);    
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);

    NppiSize sz;
    sz.width  = src.cols;
    sz.height = src.rows;

+    Scalar res;
+#if NPP_VERSION >= 32
+    CV_Assert(!"disabled until fix crash");
+
    int bufsz;
-    
+
    if (src.type() == CV_8UC1)
-    {        
+    {
        nppiReductionGetBufferHostSize_8u_C1R(sz, &bufsz);
        GpuMat buf(1, bufsz, CV_32S);

-        Scalar res;
        nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
-        return res;
    }
    else
-    {                
+    {
        nppiReductionGetBufferHostSize_8u_C4R(sz, &bufsz);
        GpuMat buf(1, bufsz, CV_32S);

-        Scalar res;
        nppSafeCall( nppiSum_8u_C4R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
-        return res;
    }
+#else
+    if (src.type() == CV_8UC1)
+        nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, res.val) );
+    else
+        nppSafeCall( nppiSum_8u_C4R(src.ptr<Npp8u>(), src.step, sz, res.val) );
+#endif
+
+    return res;
 }

 ////////////////////////////////////////////////////////////////////////
@@ -501,22 +586,30 @@ namespace
        sz.width  = src.cols;
        sz.height = src.rows;

-        Npp8u* cuMin = nppsMalloc_8u(4);
-        Npp8u* cuMax = nppsMalloc_8u(4);
+        Npp8u* cuMem;

-        nppSafeCall( nppiMinMax_8u_C4R(src.ptr<Npp8u>(), src.step, sz, cuMin, cuMax) );
+#if NPP_VERSION >= 32
+        cuMem = nppsMalloc_8u(8);
+#else
+        cudaSafeCall( cudaMalloc((void**)&cuMem, 8 * sizeof(Npp8u)) );
+#endif
+
+        nppSafeCall( nppiMinMax_8u_C4R(src.ptr<Npp8u>(), src.step, sz, cuMem, cuMem + 4) );

        if (minVal)
-            cudaMemcpy(minVal, cuMin, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);        
+            cudaMemcpy(minVal, cuMem, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
        if (maxVal)
-            cudaMemcpy(maxVal, cuMax, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
+            cudaMemcpy(maxVal, cuMem + 4, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);

-        nppsFree(cuMin);
-        nppsFree(cuMax);
+#if NPP_VERSION >= 32
+        nppsFree(cuMem);
+#else
+        cudaSafeCall( cudaFree(cuMem) );
+#endif
    }
 }

-void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) 
+void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
 {
    typedef void (*minMax_t)(const GpuMat& src, double* minVal, double* maxVal);
    static const minMax_t minMax_callers[] = {0, minMax_c1, 0, 0, minMax_c4};
@@ -559,13 +652,13 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
    NppiSize sz;
    sz.height = src.rows;
    sz.width = src.cols;
-    
+
    Mat nppLut;
    lut.convertTo(nppLut, CV_32S);

    if (src.type() == CV_8UC1)
    {
-        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, 
+        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
            nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
    }
    else
@@ -578,10 +671,10 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
        {
            cv::split(nppLut, nppLut3);
            pValues3[0] = nppLut3[0].ptr<Npp32s>();
-            pValues3[1] = nppLut3[1].ptr<Npp32s>(); 
+            pValues3[1] = nppLut3[1].ptr<Npp32s>();
            pValues3[2] = nppLut3[2].ptr<Npp32s>();
        }
-        nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, 
+        nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
            pValues3, lvls.pLevels3, lvls.nValues3) );
    }
 }
@@ -591,6 +684,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)

 void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
 {
+#if NPP_VERSION >= 32
    CV_Assert(src.type() == CV_32FC1);

    dst.create(src.size(), src.type());
@@ -600,6 +694,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
    sz.height = src.rows;

    nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
+#else
+    CV_Assert(!"This function doesn't supported");
+#endif
 }

 ////////////////////////////////////////////////////////////////////////
@@ -607,6 +704,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)

 void cv::gpu::log(const GpuMat& src, GpuMat& dst)
 {
+#if NPP_VERSION >= 32
    CV_Assert(src.type() == CV_32FC1);

    dst.create(src.size(), src.type());
@@ -616,11 +714,15 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst)
    sz.height = src.rows;

    nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
+#else
+    CV_Assert(!"This function doesn't supported");
+#endif
 }

 ////////////////////////////////////////////////////////////////////////
 // NPP magnitide

+#ifdef NPP_HAVE_COMPLEX_TYPE
 namespace
 {
    typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
@@ -638,21 +740,30 @@ namespace
        nppSafeCall( func(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
    }
 }
+#endif

 void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst)
 {
+#ifdef NPP_HAVE_COMPLEX_TYPE
    ::npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R);
+#else
+    CV_Assert(!"This function doesn't supported");
+#endif
 }

 void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst)
 {
+#ifdef NPP_HAVE_COMPLEX_TYPE
    ::npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R);
+#else
+    CV_Assert(!"This function doesn't supported");
+#endif
 }

 ////////////////////////////////////////////////////////////////////////
 // Polar <-> Cart

-namespace cv { namespace gpu { namespace mathfunc 
+namespace cv { namespace gpu { namespace mathfunc
 {
    void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream);
    void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
@@ -721,7 +832,7 @@ void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleI
 }

 void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, const Stream& stream)
-{   
+{
    ::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
 }