added implement MatPL with serocopy and writecombited

2010-08-13 14:52:50 +00:00
parent 024283ceae
commit 35ebeb21bd
3 changed files with 97 additions and 45 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -233,17 +233,18 @@ namespace cv
        {
        public:
-            //Not supported.  Now behaviour is like ALLOC_DEFAULT.
+            //Supported.  Now behaviour is like ALLOC_DEFAULT.
-            //enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
+            enum  { ALLOC_PAGE_LOCKED = 0, ALLOC_ZEROCOPY = 1, ALLOC_WRITE_COMBINED = 4 };
            MatPL();
            MatPL(const MatPL& m);
-            MatPL(int _rows, int _cols, int _type);
+            MatPL(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
-            MatPL(Size _size, int _type);
+            MatPL(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
            //! creates from cv::Mat with coping data
-            explicit MatPL(const Mat& m);
+            explicit MatPL(const Mat& m, int type_alloc = ALLOC_PAGE_LOCKED);
            ~MatPL();
@@ -253,8 +254,8 @@ namespace cv
            MatPL clone() const;
            //! allocates new matrix data unless the matrix already has specified size and type.
-            void create(int _rows, int _cols, int _type);
+            void create(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
-            void create(Size _size, int _type);
+            void create(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
            //! decrements reference counter and released memory if needed.
            void release();
@@ -263,6 +264,11 @@ namespace cv
            Mat createMatHeader() const;
            operator Mat() const;
            operator GpuMat() const;
            static bool can_device_map_to_host();
            // Please see cv::Mat for descriptions
            bool isContinuous() const;
            size_t elemSize() const;
@@ -274,16 +280,20 @@ namespace cv
            Size size() const;
            bool empty() const;
            // Please see cv::Mat for descriptions
            int flags;
            int rows, cols;
            size_t step;
            int alloc_type;
            uchar* data;
            int* refcount;
            uchar* datastart;
            uchar* dataend;
        };
        //////////////////////////////// CudaStream ////////////////////////////////
--- a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
+++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
@@ -343,29 +343,28 @@ static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }
 ///////////////////////////////////////////////////////////////////////
 inline MatPL::MatPL()  : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}
-inline MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+inline MatPL::MatPL(int _rows, int _cols, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
 {
    if( _rows > 0 && _cols > 0 )
-        create( _rows, _cols, _type );
+        create( _rows, _cols, _type , type_alloc);
 }
-inline MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+inline MatPL::MatPL(Size _size, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
 {
    if( _size.height > 0 && _size.width > 0 )
-        create( _size.height, _size.width, _type );
+        create( _size.height, _size.width, _type, type_alloc );
 }
 inline MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0)
 {
    if( refcount )
        CV_XADD(refcount, 1);
 }
-inline MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+inline MatPL::MatPL(const Mat& m, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
 {
    if( m.rows > 0 && m.cols > 0 )
-        create( m.size(), m.type() );
+        create( m.size(), m.type() , type_alloc);
    Mat tmp = createMatHeader();
    m.copyTo(tmp);
@@ -375,6 +374,7 @@ inline MatPL::~MatPL()
 {
    release();
 }
 inline MatPL& MatPL::operator = (const MatPL& m)
 {
    if( this != &m )
@@ -388,6 +388,7 @@ inline MatPL& MatPL::operator = (const MatPL& m)
        datastart = m.datastart;
        dataend = m.dataend;
        refcount = m.refcount;
        alloc_type = m.alloc_type;
    }
    return *this;
 }
@@ -401,7 +402,7 @@ inline MatPL MatPL::clone() const
    return m;
 }
-inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); }
+inline void MatPL::create(Size _size, int _type, int type_alloc) { create(_size.height, _size.width, _type, type_alloc); }
 //CCP void MatPL::create(int _rows, int _cols, int _type);
 //CPP void MatPL::release();
--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@@ -67,7 +67,8 @@ namespace cv
        void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
        void GpuMat::release() { throw_nogpu(); }
-        void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
+        void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
        void MatPL::get_property_device() { throw_nogpu(); }
        void MatPL::release() { throw_nogpu(); }
    }
@@ -209,6 +210,15 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
    return hdr;
 }
 bool cv::gpu::MatPL::can_device_map_to_host()
 {
        cudaDeviceProp prop;
        cudaGetDeviceProperties(&prop, 0);
        return (prop.canMapHostMemory != 0) ? true : false;
 }
 void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
 {
    _type &= TYPE_MASK;
@@ -259,8 +269,9 @@ void cv::gpu::GpuMat::release()
 //////////////////////////////// MatPL ////////////////////////////////
 ///////////////////////////////////////////////////////////////////////
-void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
+void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
 {
    alloc_type = type_alloc;
    _type &= TYPE_MASK;
    if( rows == _rows && cols == _cols && type() == _type && data )
        return;
@@ -281,7 +292,24 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
        //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
        void *ptr;
-        cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) );
+
        switch (type_alloc)
        {
            case ALLOC_PAGE_LOCKED:  cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
            case ALLOC_ZEROCOPY:
                if (can_device_map_to_host() == true)
                {
                    cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );
                }
                else
                    cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
                break;
            case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
            default:
                cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
        }
        datastart = data =  (uchar*)ptr;
        dataend = data + nettosize;
@@ -291,6 +319,19 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
    }
 }
 inline MatPL::operator GpuMat() const
 {
    if (alloc_type == ALLOC_ZEROCOPY)
    {
        void ** pdev;
        cudaHostGetDevicePointer( pdev, this->data, 0 );
        GpuMat m(this->rows, this->cols, this->type(), *pdev, this->step);
        return m;
    }
    else
        cv::gpu::error("", __FILE__, __LINE__);
 }
 void cv::gpu::MatPL::release()
 {
    if( refcount && CV_XADD(refcount, -1) == 1 )