From 6c253a41c275110dcadf844021a6c73e42d0401a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 26 Apr 2013 10:49:51 +0400 Subject: [PATCH] added CudaMem support to Input/Output arrays --- modules/core/include/opencv2/core/base.hpp | 2 + modules/core/include/opencv2/core/mat.hpp | 5 + modules/core/src/matrix.cpp | 109 ++++++++++++++++++--- 3 files changed, 103 insertions(+), 13 deletions(-) diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index eb635a76a..1bcaf4ef4 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -493,6 +493,8 @@ namespace ogl namespace gpu { class CV_EXPORTS GpuMat; + class CV_EXPORTS CudaMem; + class CV_EXPORTS Stream; } } // cv diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 2432c3b03..8c9b10cea 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -77,6 +77,7 @@ public: STD_VECTOR_MAT = 5 << KIND_SHIFT, EXPR = 6 << KIND_SHIFT, OPENGL_BUFFER = 7 << KIND_SHIFT, + CUDA_MEM = 8 << KIND_SHIFT, GPU_MAT = 9 << KIND_SHIFT }; @@ -93,6 +94,7 @@ public: _InputArray(const double& val); _InputArray(const gpu::GpuMat& d_mat); _InputArray(const ogl::Buffer& buf); + _InputArray(const gpu::CudaMem& cuda_mem); virtual Mat getMat(int i=-1) const; virtual void getMatVector(std::vector& mv) const; @@ -140,6 +142,7 @@ public: _OutputArray(std::vector& vec); _OutputArray(gpu::GpuMat& d_mat); _OutputArray(ogl::Buffer& buf); + _OutputArray(gpu::CudaMem& cuda_mem); template _OutputArray(std::vector<_Tp>& vec); template _OutputArray(std::vector >& vec); template _OutputArray(std::vector >& vec); @@ -151,6 +154,7 @@ public: _OutputArray(const std::vector& vec); _OutputArray(const gpu::GpuMat& d_mat); _OutputArray(const ogl::Buffer& buf); + _OutputArray(const gpu::CudaMem& cuda_mem); template _OutputArray(const std::vector<_Tp>& vec); template _OutputArray(const std::vector >& vec); template _OutputArray(const std::vector >& vec); @@ -164,6 +168,7 @@ public: virtual Mat& getMatRef(int i=-1) const; virtual gpu::GpuMat& getGpuMatRef() const; virtual ogl::Buffer& getOGlBufferRef() const; + virtual gpu::CudaMem& getCudaMemRef() const; virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 7b3af5514..948c8d770 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -941,13 +941,15 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) \*************************************************************************************************/ _InputArray::_InputArray() : flags(0), obj(0) {} -_InputArray::~_InputArray() {} _InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {} _InputArray::_InputArray(const std::vector& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {} _InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {} _InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {} _InputArray::_InputArray(const gpu::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {} _InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {} +_InputArray::_InputArray(const gpu::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {} + +_InputArray::~_InputArray() {} Mat _InputArray::getMat(int i) const { @@ -995,14 +997,37 @@ Mat _InputArray::getMat(int i) const return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat(); } - CV_Assert( k == STD_VECTOR_MAT ); - //if( k == STD_VECTOR_MAT ) + if( k == STD_VECTOR_MAT ) { const std::vector& v = *(const std::vector*)obj; CV_Assert( 0 <= i && i < (int)v.size() ); return v[i]; } + + if( k == OPENGL_BUFFER ) + { + CV_Assert( i < 0 ); + CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapHost/unmapHost methods for ogl::Buffer object"); + return Mat(); + } + + if( k == GPU_MAT ) + { + CV_Assert( i < 0 ); + CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for gpu::GpuMat object"); + return Mat(); + } + + CV_Assert( k == CUDA_MEM ); + //if( k == CUDA_MEM ) + { + CV_Assert( i < 0 ); + + const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj; + + return cuda_mem->createMatHeader(); + } } @@ -1091,10 +1116,26 @@ gpu::GpuMat _InputArray::getGpuMat() const { int k = kind(); - CV_Assert(k == GPU_MAT); + if (k == GPU_MAT) + { + const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj; + return *d_mat; + } - const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj; - return *d_mat; + if (k == CUDA_MEM) + { + const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj; + return cuda_mem->createGpuMatHeader(); + } + + if (k == OPENGL_BUFFER) + { + CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapDevice/unmapDevice methods for ogl::Buffer object"); + return gpu::GpuMat(); + } + + CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem"); + return gpu::GpuMat(); } ogl::Buffer _InputArray::getOGlBuffer() const @@ -1175,13 +1216,20 @@ Size _InputArray::size(int i) const return buf->size(); } - CV_Assert( k == GPU_MAT ); - //if( k == GPU_MAT ) + if( k == GPU_MAT ) { CV_Assert( i < 0 ); const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj; return d_mat->size(); } + + CV_Assert( k == CUDA_MEM ); + //if( k == CUDA_MEM ) + { + CV_Assert( i < 0 ); + const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj; + return cuda_mem->size(); + } } size_t _InputArray::total(int i) const @@ -1234,9 +1282,12 @@ int _InputArray::type(int i) const if( k == OPENGL_BUFFER ) return ((const ogl::Buffer*)obj)->type(); - CV_Assert( k == GPU_MAT ); - //if( k == GPU_MAT ) + if( k == GPU_MAT ) return ((const gpu::GpuMat*)obj)->type(); + + CV_Assert( k == CUDA_MEM ); + //if( k == CUDA_MEM ) + return ((const gpu::CudaMem*)obj)->type(); } int _InputArray::depth(int i) const @@ -1286,24 +1337,29 @@ bool _InputArray::empty() const if( k == OPENGL_BUFFER ) return ((const ogl::Buffer*)obj)->empty(); - CV_Assert( k == GPU_MAT ); - //if( k == GPU_MAT ) + if( k == GPU_MAT ) return ((const gpu::GpuMat*)obj)->empty(); + + CV_Assert( k == CUDA_MEM ); + //if( k == CUDA_MEM ) + return ((const gpu::CudaMem*)obj)->empty(); } _OutputArray::_OutputArray() {} -_OutputArray::~_OutputArray() {} _OutputArray::_OutputArray(Mat& m) : _InputArray(m) {} _OutputArray::_OutputArray(std::vector& vec) : _InputArray(vec) {} _OutputArray::_OutputArray(gpu::GpuMat& d_mat) : _InputArray(d_mat) {} _OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {} +_OutputArray::_OutputArray(gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {} _OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;} _OutputArray::_OutputArray(const std::vector& vec) : _InputArray(vec) {flags |= FIXED_SIZE;} _OutputArray::_OutputArray(const gpu::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;} _OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;} +_OutputArray::_OutputArray(const gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;} +_OutputArray::~_OutputArray() {} bool _OutputArray::fixedSize() const { @@ -1339,6 +1395,13 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int ((ogl::Buffer*)obj)->create(_sz, mtype); return; } + if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + { + CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == _sz); + CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype); + ((gpu::CudaMem*)obj)->create(_sz, mtype); + return; + } int sizes[] = {_sz.height, _sz.width}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); } @@ -1367,6 +1430,13 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp ((ogl::Buffer*)obj)->create(rows, cols, mtype); return; } + if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + { + CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == Size(cols, rows)); + CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype); + ((gpu::CudaMem*)obj)->create(rows, cols, mtype); + return; + } int sizes[] = {rows, cols}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); } @@ -1586,6 +1656,12 @@ void _OutputArray::release() const return; } + if( k == CUDA_MEM ) + { + ((gpu::CudaMem*)obj)->release(); + return; + } + if( k == OPENGL_BUFFER ) { ((ogl::Buffer*)obj)->release(); @@ -1664,6 +1740,13 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const return *(ogl::Buffer*)obj; } +gpu::CudaMem& _OutputArray::getCudaMemRef() const +{ + int k = kind(); + CV_Assert( k == CUDA_MEM ); + return *(gpu::CudaMem*)obj; +} + static _OutputArray _none; OutputArray noArray() { return _none; }