Added implementation and test for the GPU version of subtract, multiply, divide, transpose, absdiff, threshold, compare, meanStdDev, norm, based on NPP.

2010-09-13 14:30:09 +00:00
parent 88a7a8f567
commit 37d39bd9de
6 changed files with 706 additions and 194 deletions
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -49,44 +49,211 @@ using namespace std;
 #if !defined (HAVE_CUDA)

 void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+
+void cv::gpu::transpose(const GpuMat& src1, GpuMat& dst) { throw_nogpu(); }
+
+void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+
+double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int thresholdType) { throw_nogpu(); return 0.0; }
+
+void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) { throw_nogpu(); }
+
+void cv::gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev) { throw_nogpu(); }
+
+double cv::gpu::norm(const GpuMat& src1, int normType) { throw_nogpu(); return 0.0; }
+double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) { throw_nogpu(); return 0.0; }

 #else /* !defined (HAVE_CUDA) */

+namespace
+{
+	typedef NppStatus (*npp_binary_func_8u_scale_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, 
+											  NppiSize oSizeROI, int nScaleFactor);
+	typedef NppStatus (*npp_binary_func_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, 
+									     int nDstStep, NppiSize oSizeROI);
+
+	void nppFuncCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, 
+					   npp_binary_func_8u_scale_t npp_func_8uc1, npp_binary_func_8u_scale_t npp_func_8uc4, npp_binary_func_32f_t npp_func_32fc1)
+	{
+        CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+
+        CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1);
+
+        dst.create( src1.size(), src1.type() );
+
+		NppiSize sz;
+		sz.width  = src1.cols;
+		sz.height = src1.rows;
+
+		if (src1.depth() == CV_8U)
+		{
+			if (src1.channels() == 1)
+			{
+				npp_func_8uc1((const Npp8u*)src1.ptr<char>(), src1.step, 
+					(const Npp8u*)src2.ptr<char>(), src2.step, 
+					(Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
+			}
+			else
+			{
+				npp_func_8uc4((const Npp8u*)src1.ptr<char>(), src1.step, 
+					(const Npp8u*)src2.ptr<char>(), src2.step, 
+					(Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
+			}        
+		}
+		else //if (src1.depth() == CV_32F)
+		{
+			npp_func_32fc1((const Npp32f*)src1.ptr<float>(), src1.step,
+				(const Npp32f*)src2.ptr<float>(), src2.step,
+				(Npp32f*)dst.ptr<float>(), dst.step, sz);
+		}
+	}
+}
+
 void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
+	nppFuncCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32f_C1R);
+}
+
+void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) 
+{
+	nppFuncCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32f_C1R);
+}
+
+void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
+{
+	nppFuncCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32f_C1R);
+}
+
+void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
+{
+	nppFuncCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32f_C1R);
+}
+
+void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
+{
+    CV_Assert(src.type() == CV_8UC1);
+
+    dst.create( src.cols, src.rows, src.type() );
+
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+
+    nppiTranspose_8u_C1R((const Npp8u*)src.ptr<char>(), src.step, (Npp8u*)dst.ptr<char>(), dst.step, sz);
+}
+
+void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
+{
+	CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+
+	CV_Assert((src1.depth() == CV_8U || src1.depth() == CV_32F) && src1.channels() == 1);
+
    dst.create( src1.size(), src1.type() );

-    CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
-
-    int nChannels = src1.channels();
-    CV_DbgAssert((src1.depth() == CV_8U  && nChannels == 1 || nChannels == 4) || 
-                 (src1.depth() == CV_32F && nChannels == 1));
-
    NppiSize sz;
    sz.width  = src1.cols;
    sz.height = src1.rows;

    if (src1.depth() == CV_8U)
    {
-        if (nChannels == 1)
-        {
-            nppiAdd_8u_C1RSfs((const Npp8u*)src1.ptr<char>(), src1.step, 
-                              (const Npp8u*)src2.ptr<char>(), src2.step, 
-                              (Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
-        }
-        else
-        {
-            nppiAdd_8u_C4RSfs((const Npp8u*)src1.ptr<char>(), src1.step, 
-                              (const Npp8u*)src2.ptr<char>(), src2.step, 
-                              (Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
-        }        
+        nppiAbsDiff_8u_C1R((const Npp8u*)src1.ptr<char>(), src1.step, 
+                (const Npp8u*)src2.ptr<char>(), src2.step, 
+                (Npp8u*)dst.ptr<char>(), dst.step, sz);
    }
    else //if (src1.depth() == CV_32F)
    {
-        nppiAdd_32f_C1R((const Npp32f*)src1.ptr<float>(), src1.step,
-                        (const Npp32f*)src2.ptr<float>(), src2.step,
-                        (Npp32f*)dst.ptr<float>(), dst.step, sz);
+        nppiAbsDiff_32f_C1R((const Npp32f*)src1.ptr<float>(), src1.step,
+            (const Npp32f*)src2.ptr<float>(), src2.step,
+            (Npp32f*)dst.ptr<float>(), dst.step, sz);
    }
 }

+double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double /*maxVal*/, int thresholdType) 
+{ 
+    CV_Assert(src.type() == CV_32FC1 && thresholdType == THRESH_TRUNC);
+
+    dst.create( src.size(), src.type() );
+
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+
+    nppiThreshold_32f_C1R((const Npp32f*)src.ptr<float>(), src.step, 
+        (Npp32f*)dst.ptr<float>(), dst.step, sz, (Npp32f)thresh, NPP_CMP_GREATER);
+
+    return thresh;
+}
+
+void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) 
+{
+    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+
+    CV_Assert((src1.type() == CV_8UC4 || src1.type() == CV_32FC1) && cmpop != CMP_NE);
+
+    dst.create( src1.size(), CV_8UC1 );
+
+    static const NppCmpOp nppCmpOp[] = { NPP_CMP_EQ, NPP_CMP_GREATER, NPP_CMP_GREATER_EQ, NPP_CMP_LESS, NPP_CMP_LESS_EQ };
+
+    NppiSize sz;
+    sz.width  = src1.cols;
+    sz.height = src1.rows;
+
+    if (src1.depth() == CV_8U)
+    {
+        nppiCompare_8u_C4R((const Npp8u*)src1.ptr<char>(), src1.step, 
+            (const Npp8u*)src2.ptr<char>(), src2.step, 
+            (Npp8u*)dst.ptr<char>(), dst.step, sz, nppCmpOp[cmpop]);
+    }
+    else //if (src1.depth() == CV_32F)
+    {
+        nppiCompare_32f_C1R((const Npp32f*)src1.ptr<float>(), src1.step,
+            (const Npp32f*)src2.ptr<float>(), src2.step,
+            (Npp8u*)dst.ptr<char>(), dst.step, sz, nppCmpOp[cmpop]);
+    }
+}
+
+void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) 
+{
+    CV_Assert(src.type() == CV_8UC1);
+
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+
+    nppiMean_StdDev_8u_C1R((const Npp8u*)src.ptr<char>(), src.step, sz, mean.val, stddev.val);
+}
+
+double cv::gpu::norm(const GpuMat& src1, int normType) 
+{
+    return norm(src1, GpuMat(src1.size(), src1.type(), Scalar::all(0.0)), normType);
+}
+
+double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
+{
+    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+
+    CV_Assert((src1.type() == CV_8UC1) && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2));
+
+    typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, 
+        NppiSize oSizeROI, Npp64f* pRetVal);
+
+    static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+
+    NppiSize sz;
+    sz.width  = src1.cols;
+    sz.height = src1.rows;
+
+    int funcIdx = normType >> 1;
+    Npp64f retVal[3];
+
+    npp_norm_diff_func[funcIdx]((const Npp8u*)src1.ptr<char>(), src1.step, 
+        (const Npp8u*)src2.ptr<char>(), src2.step, 
+        sz, retVal);
+
+    return retVal[0];
+}
+
 #endif /* !defined (HAVE_CUDA) */
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -55,7 +55,7 @@
 #include <vector>

 #include "opencv2/gpu/gpu.hpp"
-#include "opencv2/imgproc/types_c.h"
+#include "opencv2/imgproc/imgproc.hpp"

 #if defined(HAVE_CUDA)