From 6c253a41c275110dcadf844021a6c73e42d0401a Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 26 Apr 2013 10:49:51 +0400
Subject: [PATCH] added CudaMem support to Input/Output arrays

---
 modules/core/include/opencv2/core/base.hpp |   2 +
 modules/core/include/opencv2/core/mat.hpp  |   5 +
 modules/core/src/matrix.cpp                | 109 ++++++++++++++++++---
 3 files changed, 103 insertions(+), 13 deletions(-)
diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp
index eb635a76a..1bcaf4ef4 100644
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -493,6 +493,8 @@ namespace ogl
 namespace gpu
 {
     class CV_EXPORTS GpuMat;
+    class CV_EXPORTS CudaMem;
+    class CV_EXPORTS Stream;
 }
 
 } // cv
diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp
index 2432c3b03..8c9b10cea 100644
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -77,6 +77,7 @@ public:
         STD_VECTOR_MAT    = 5 << KIND_SHIFT,
         EXPR              = 6 << KIND_SHIFT,
         OPENGL_BUFFER     = 7 << KIND_SHIFT,
+        CUDA_MEM          = 8 << KIND_SHIFT,
         GPU_MAT           = 9 << KIND_SHIFT
     };
 
@@ -93,6 +94,7 @@ public:
     _InputArray(const double& val);
     _InputArray(const gpu::GpuMat& d_mat);
     _InputArray(const ogl::Buffer& buf);
+    _InputArray(const gpu::CudaMem& cuda_mem);
 
     virtual Mat getMat(int i=-1) const;
     virtual void getMatVector(std::vector<Mat>& mv) const;
@@ -140,6 +142,7 @@ public:
     _OutputArray(std::vector<Mat>& vec);
     _OutputArray(gpu::GpuMat& d_mat);
     _OutputArray(ogl::Buffer& buf);
+    _OutputArray(gpu::CudaMem& cuda_mem);
     template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
     template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
     template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
@@ -151,6 +154,7 @@ public:
     _OutputArray(const std::vector<Mat>& vec);
     _OutputArray(const gpu::GpuMat& d_mat);
     _OutputArray(const ogl::Buffer& buf);
+    _OutputArray(const gpu::CudaMem& cuda_mem);
     template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
     template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
     template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
@@ -164,6 +168,7 @@ public:
     virtual Mat& getMatRef(int i=-1) const;
     virtual gpu::GpuMat& getGpuMatRef() const;
     virtual ogl::Buffer& getOGlBufferRef() const;
+    virtual gpu::CudaMem& getCudaMemRef() const;
     virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
     virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
     virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index 7b3af5514..948c8d770 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -941,13 +941,15 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
 \*************************************************************************************************/
 
 _InputArray::_InputArray() : flags(0), obj(0) {}
-_InputArray::~_InputArray() {}
 _InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {}
 _InputArray::_InputArray(const std::vector<Mat>& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {}
 _InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {}
 _InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {}
 _InputArray::_InputArray(const gpu::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
 _InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {}
+_InputArray::_InputArray(const gpu::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
+
+_InputArray::~_InputArray() {}
 
 Mat _InputArray::getMat(int i) const
 {
@@ -995,14 +997,37 @@ Mat _InputArray::getMat(int i) const
         return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
     }
 
-    CV_Assert( k == STD_VECTOR_MAT );
-    //if( k == STD_VECTOR_MAT )
+    if( k == STD_VECTOR_MAT )
     {
         const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
         CV_Assert( 0 <= i && i < (int)v.size() );
 
         return v[i];
     }
+
+    if( k == OPENGL_BUFFER )
+    {
+        CV_Assert( i < 0 );
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapHost/unmapHost methods for ogl::Buffer object");
+        return Mat();
+    }
+
+    if( k == GPU_MAT )
+    {
+        CV_Assert( i < 0 );
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for gpu::GpuMat object");
+        return Mat();
+    }
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+    {
+        CV_Assert( i < 0 );
+
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+
+        return cuda_mem->createMatHeader();
+    }
 }
 
 
@@ -1091,10 +1116,26 @@ gpu::GpuMat _InputArray::getGpuMat() const
 {
     int k = kind();
 
-    CV_Assert(k == GPU_MAT);
+    if (k == GPU_MAT)
+    {
+        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+        return *d_mat;
+    }
 
-    const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
-    return *d_mat;
+    if (k == CUDA_MEM)
+    {
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        return cuda_mem->createGpuMatHeader();
+    }
+
+    if (k == OPENGL_BUFFER)
+    {
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapDevice/unmapDevice methods for ogl::Buffer object");
+        return gpu::GpuMat();
+    }
+
+    CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem");
+    return gpu::GpuMat();
 }
 
 ogl::Buffer _InputArray::getOGlBuffer() const
@@ -1175,13 +1216,20 @@ Size _InputArray::size(int i) const
         return buf->size();
     }
 
-    CV_Assert( k == GPU_MAT );
-    //if( k == GPU_MAT )
+    if( k == GPU_MAT )
     {
         CV_Assert( i < 0 );
         const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
         return d_mat->size();
     }
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+    {
+        CV_Assert( i < 0 );
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        return cuda_mem->size();
+    }
 }
 
 size_t _InputArray::total(int i) const
@@ -1234,9 +1282,12 @@ int _InputArray::type(int i) const
     if( k == OPENGL_BUFFER )
         return ((const ogl::Buffer*)obj)->type();
 
-    CV_Assert( k == GPU_MAT );
-    //if( k == GPU_MAT )
+    if( k == GPU_MAT )
         return ((const gpu::GpuMat*)obj)->type();
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+        return ((const gpu::CudaMem*)obj)->type();
 }
 
 int _InputArray::depth(int i) const
@@ -1286,24 +1337,29 @@ bool _InputArray::empty() const
     if( k == OPENGL_BUFFER )
         return ((const ogl::Buffer*)obj)->empty();
 
-    CV_Assert( k == GPU_MAT );
-    //if( k == GPU_MAT )
+    if( k == GPU_MAT )
         return ((const gpu::GpuMat*)obj)->empty();
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+        return ((const gpu::CudaMem*)obj)->empty();
 }
 
 
 _OutputArray::_OutputArray() {}
-_OutputArray::~_OutputArray() {}
 _OutputArray::_OutputArray(Mat& m) : _InputArray(m) {}
 _OutputArray::_OutputArray(std::vector<Mat>& vec) : _InputArray(vec) {}
 _OutputArray::_OutputArray(gpu::GpuMat& d_mat) : _InputArray(d_mat) {}
 _OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {}
+_OutputArray::_OutputArray(gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}
 
 _OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const std::vector<Mat>& vec) : _InputArray(vec) {flags |= FIXED_SIZE;}
 _OutputArray::_OutputArray(const gpu::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;}
+_OutputArray::_OutputArray(const gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}
 
+_OutputArray::~_OutputArray() {}
 
 bool _OutputArray::fixedSize() const
 {
@@ -1339,6 +1395,13 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
         ((ogl::Buffer*)obj)->create(_sz, mtype);
         return;
     }
+    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    {
+        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == _sz);
+        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
+        ((gpu::CudaMem*)obj)->create(_sz, mtype);
+        return;
+    }
     int sizes[] = {_sz.height, _sz.width};
     create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
 }
@@ -1367,6 +1430,13 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
         ((ogl::Buffer*)obj)->create(rows, cols, mtype);
         return;
     }
+    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    {
+        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == Size(cols, rows));
+        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
+        ((gpu::CudaMem*)obj)->create(rows, cols, mtype);
+        return;
+    }
     int sizes[] = {rows, cols};
     create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
 }
@@ -1586,6 +1656,12 @@ void _OutputArray::release() const
         return;
     }
 
+    if( k == CUDA_MEM )
+    {
+        ((gpu::CudaMem*)obj)->release();
+        return;
+    }
+
     if( k == OPENGL_BUFFER )
     {
         ((ogl::Buffer*)obj)->release();
@@ -1664,6 +1740,13 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
     return *(ogl::Buffer*)obj;
 }
 
+gpu::CudaMem& _OutputArray::getCudaMemRef() const
+{
+    int k = kind();
+    CV_Assert( k == CUDA_MEM );
+    return *(gpu::CudaMem*)obj;
+}
+
 static _OutputArray _none;
 OutputArray noArray() { return _none; }