added CudaMem support to Input/Output arrays

2013-04-26 10:49:51 +04:00
parent 2fae1d9507
commit 6c253a41c2
3 changed files with 103 additions and 13 deletions
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -493,6 +493,8 @@ namespace ogl
 namespace gpu
 {
    class CV_EXPORTS GpuMat;
+    class CV_EXPORTS CudaMem;
+    class CV_EXPORTS Stream;
 }

 } // cv
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -77,6 +77,7 @@ public:
        STD_VECTOR_MAT    = 5 << KIND_SHIFT,
        EXPR              = 6 << KIND_SHIFT,
        OPENGL_BUFFER     = 7 << KIND_SHIFT,
+        CUDA_MEM          = 8 << KIND_SHIFT,
        GPU_MAT           = 9 << KIND_SHIFT
    };

@@ -93,6 +94,7 @@ public:
    _InputArray(const double& val);
    _InputArray(const gpu::GpuMat& d_mat);
    _InputArray(const ogl::Buffer& buf);
+    _InputArray(const gpu::CudaMem& cuda_mem);

    virtual Mat getMat(int i=-1) const;
    virtual void getMatVector(std::vector<Mat>& mv) const;
@@ -140,6 +142,7 @@ public:
    _OutputArray(std::vector<Mat>& vec);
    _OutputArray(gpu::GpuMat& d_mat);
    _OutputArray(ogl::Buffer& buf);
+    _OutputArray(gpu::CudaMem& cuda_mem);
    template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
    template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
    template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
@@ -151,6 +154,7 @@ public:
    _OutputArray(const std::vector<Mat>& vec);
    _OutputArray(const gpu::GpuMat& d_mat);
    _OutputArray(const ogl::Buffer& buf);
+    _OutputArray(const gpu::CudaMem& cuda_mem);
    template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
    template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
    template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
@@ -164,6 +168,7 @@ public:
    virtual Mat& getMatRef(int i=-1) const;
    virtual gpu::GpuMat& getGpuMatRef() const;
    virtual ogl::Buffer& getOGlBufferRef() const;
+    virtual gpu::CudaMem& getCudaMemRef() const;
    virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
    virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
    virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -941,13 +941,15 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
 \*************************************************************************************************/

 _InputArray::_InputArray() : flags(0), obj(0) {}
-_InputArray::~_InputArray() {}
 _InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {}
 _InputArray::_InputArray(const std::vector<Mat>& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {}
 _InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {}
 _InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {}
 _InputArray::_InputArray(const gpu::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
 _InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {}
+_InputArray::_InputArray(const gpu::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
+
+_InputArray::~_InputArray() {}

 Mat _InputArray::getMat(int i) const
 {
@@ -995,14 +997,37 @@ Mat _InputArray::getMat(int i) const
        return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
    }

-    CV_Assert( k == STD_VECTOR_MAT );
-    //if( k == STD_VECTOR_MAT )
+    if( k == STD_VECTOR_MAT )
    {
        const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
        CV_Assert( 0 <= i && i < (int)v.size() );

        return v[i];
    }
+
+    if( k == OPENGL_BUFFER )
+    {
+        CV_Assert( i < 0 );
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapHost/unmapHost methods for ogl::Buffer object");
+        return Mat();
+    }
+
+    if( k == GPU_MAT )
+    {
+        CV_Assert( i < 0 );
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for gpu::GpuMat object");
+        return Mat();
+    }
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+    {
+        CV_Assert( i < 0 );
+
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+
+        return cuda_mem->createMatHeader();
+    }
 }


@@ -1091,10 +1116,26 @@ gpu::GpuMat _InputArray::getGpuMat() const
 {
    int k = kind();

-    CV_Assert(k == GPU_MAT);
+    if (k == GPU_MAT)
+    {
+        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+        return *d_mat;
+    }

-    const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
-    return *d_mat;
+    if (k == CUDA_MEM)
+    {
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        return cuda_mem->createGpuMatHeader();
+    }
+
+    if (k == OPENGL_BUFFER)
+    {
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapDevice/unmapDevice methods for ogl::Buffer object");
+        return gpu::GpuMat();
+    }
+
+    CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem");
+    return gpu::GpuMat();
 }

 ogl::Buffer _InputArray::getOGlBuffer() const
@@ -1175,13 +1216,20 @@ Size _InputArray::size(int i) const
        return buf->size();
    }

-    CV_Assert( k == GPU_MAT );
-    //if( k == GPU_MAT )
+    if( k == GPU_MAT )
    {
        CV_Assert( i < 0 );
        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
        return d_mat->size();
    }
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+    {
+        CV_Assert( i < 0 );
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        return cuda_mem->size();
+    }
 }

 size_t _InputArray::total(int i) const
@@ -1234,9 +1282,12 @@ int _InputArray::type(int i) const
    if( k == OPENGL_BUFFER )
        return ((const ogl::Buffer*)obj)->type();

-    CV_Assert( k == GPU_MAT );
-    //if( k == GPU_MAT )
+    if( k == GPU_MAT )
        return ((const gpu::GpuMat*)obj)->type();
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+        return ((const gpu::CudaMem*)obj)->type();
 }

 int _InputArray::depth(int i) const
@@ -1286,24 +1337,29 @@ bool _InputArray::empty() const
    if( k == OPENGL_BUFFER )
        return ((const ogl::Buffer*)obj)->empty();

-    CV_Assert( k == GPU_MAT );
-    //if( k == GPU_MAT )
+    if( k == GPU_MAT )
        return ((const gpu::GpuMat*)obj)->empty();
+
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+        return ((const gpu::CudaMem*)obj)->empty();
 }


 _OutputArray::_OutputArray() {}
-_OutputArray::~_OutputArray() {}
 _OutputArray::_OutputArray(Mat& m) : _InputArray(m) {}
 _OutputArray::_OutputArray(std::vector<Mat>& vec) : _InputArray(vec) {}
 _OutputArray::_OutputArray(gpu::GpuMat& d_mat) : _InputArray(d_mat) {}
 _OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {}
+_OutputArray::_OutputArray(gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}

 _OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const std::vector<Mat>& vec) : _InputArray(vec) {flags |= FIXED_SIZE;}
 _OutputArray::_OutputArray(const gpu::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;}
+_OutputArray::_OutputArray(const gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}

+_OutputArray::~_OutputArray() {}

 bool _OutputArray::fixedSize() const
 {
@@ -1339,6 +1395,13 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
        ((ogl::Buffer*)obj)->create(_sz, mtype);
        return;
    }
+    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    {
+        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == _sz);
+        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
+        ((gpu::CudaMem*)obj)->create(_sz, mtype);
+        return;
+    }
    int sizes[] = {_sz.height, _sz.width};
    create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
 }
@@ -1367,6 +1430,13 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
        ((ogl::Buffer*)obj)->create(rows, cols, mtype);
        return;
    }
+    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    {
+        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == Size(cols, rows));
+        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
+        ((gpu::CudaMem*)obj)->create(rows, cols, mtype);
+        return;
+    }
    int sizes[] = {rows, cols};
    create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
 }
@@ -1586,6 +1656,12 @@ void _OutputArray::release() const
        return;
    }

+    if( k == CUDA_MEM )
+    {
+        ((gpu::CudaMem*)obj)->release();
+        return;
+    }
+
    if( k == OPENGL_BUFFER )
    {
        ((ogl::Buffer*)obj)->release();
@@ -1664,6 +1740,13 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
    return *(ogl::Buffer*)obj;
 }

+gpu::CudaMem& _OutputArray::getCudaMemRef() const
+{
+    int k = kind();
+    CV_Assert( k == CUDA_MEM );
+    return *(gpu::CudaMem*)obj;
+}
+
 static _OutputArray _none;
 OutputArray noArray() { return _none; }