1) more convenient naming for samples gpu

2) added mask support to device 'transform' function 3) sample hog gpu: waitKey(1) -> waitKey(3), in other case image is not displayed.
2010-11-24 09:43:17 +00:00
parent 790cd2ef28
commit 0e43976259
4 changed files with 33 additions and 15 deletions
--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
@@ -217,7 +217,7 @@ namespace cv { namespace gpu { namespace mathfunc
    template <typename T1, typename T2>
    struct NotEqual
    {
-        __device__ uchar operator()(const T1& src1, const T2& src2, int, int)
+        __device__ uchar operator()(const T1& src1, const T2& src2)
        {
            return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
        }
--- a/modules/gpu/src/cuda/transform.hpp
+++ b/modules/gpu/src/cuda/transform.hpp
@@ -47,31 +47,49 @@

 namespace cv { namespace gpu { namespace device
 {
-    template <typename T, typename D, typename UnOp>
-    static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, UnOp op)
+    //! Mask accessor
+    template<class T> struct MaskReader_
+    {
+        PtrStep_<T> mask;
+        explicit MaskReader_(PtrStep_<T> mask): mask(mask) {}                
+
+        __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
+    };
+
+    //! Stub mask accessor
+    struct NoMask 
+    {
+        __device__ bool operator()(int y, int x) const { return true; } 
+    };
+
+    //! Transform kernels
+
+    template <typename T, typename D, typename Mask, typename UnOp>
+    static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
    {
 		const int x = blockDim.x * blockIdx.x + threadIdx.x;
 		const int y = blockDim.y * blockIdx.y + threadIdx.y;

-        if (x < src.cols && y < src.rows)
+        if (x < src.cols && y < src.rows && mask(y, x))
        {
            T src_data = src.ptr(y)[x];
-            dst.ptr(y)[x] = op(src_data, x, y);
+            dst.ptr(y)[x] = op(src_data);
        }
    }
-    template <typename T1, typename T2, typename D, typename BinOp>
-    static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, BinOp op)
+
+    template <typename T1, typename T2, typename D, typename Mask, typename BinOp>
+    static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
    {
 		const int x = blockDim.x * blockIdx.x + threadIdx.x;
 		const int y = blockDim.y * blockIdx.y + threadIdx.y;

-        if (x < src1.cols && y < src1.rows)
+        if (x < src1.cols && y < src1.rows && mask(y, x))
        {
            T1 src1_data = src1.ptr(y)[x];
            T2 src2_data = src2.ptr(y)[x];
-            dst.ptr(y)[x] = op(src1_data, src2_data, x, y);
+            dst.ptr(y)[x] = op(src1_data, src2_data);
        }
-    }
+    }  
 }}}

 namespace cv 
@@ -87,7 +105,7 @@ namespace cv
            grid.x = divUp(src.cols, threads.x);
            grid.y = divUp(src.rows, threads.y);        

-            device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, op);
+            device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);

            if (stream == 0)
                cudaSafeCall( cudaThreadSynchronize() );
@@ -101,7 +119,7 @@ namespace cv
            grid.x = divUp(src1.cols, threads.x);
            grid.y = divUp(src1.rows, threads.y);        

-            device::transform<T1, T2, D, BinOp><<<grid, threads, 0, stream>>>(src1, src2, dst, op);
+            device::transform<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, device::NoMask(), op);

            if (stream == 0)
                cudaSafeCall( cudaThreadSynchronize() );