modified according to CUDA 4.0 API updates

2011-05-31 08:31:10 +00:00
parent 98d663e7e0
commit 926a6bba00
40 changed files with 1134 additions and 1818 deletions
--- a/modules/gpu/src/cudastream.cpp
+++ b/modules/gpu/src/cudastream.cpp
@@ -64,6 +64,8 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { thro
 void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
 void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
 void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
+Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
+cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }

 #else /* !defined (HAVE_CUDA) */

@@ -117,7 +119,7 @@ namespace
    }
 }

-CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl->stream; };
+CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl ? stream.impl->stream : 0; };

 void cv::gpu::Stream::create()
 {
@@ -188,18 +190,35 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(sr
 void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)  { devcopy(src, dst, impl->stream,   cudaMemcpyHostToDevice); }
 void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }

-void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
+void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
 {
    CV_Assert((src.depth() != CV_64F) || 
        (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));

+    if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
+    {
+        cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) );
+        return;
+    }
+    if (src.depth() == CV_8U)
+    {
+        int cn = src.channels();
+
+        if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
+        {
+            int val = saturate_cast<uchar>(s[0]);
+            cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) );
+            return;
+        }
+    }
+
    typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream);
    static const set_caller_t set_callers[] =
    {
        kernelSet<uchar>, kernelSet<schar>, kernelSet<ushort>, kernelSet<short>,
        kernelSet<int>, kernelSet<float>, kernelSet<double>
    };
-    set_callers[src.depth()](src, val, impl->stream);
+    set_callers[src.depth()](src, s, impl->stream);
 }

 void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
@@ -246,5 +265,17 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
    matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);
 }

+cv::gpu::Stream::operator bool() const
+{
+    return impl && impl->stream;
+}
+
+cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) {}
+
+cv::gpu::Stream& cv::gpu::Stream::Null()
+{
+    static Stream s((Impl*)0);
+    return s;
+}

 #endif /* !defined (HAVE_CUDA) */