added implement MatPL with serocopy and writecombited

2010-08-13 14:52:50 +00:00
parent 024283ceae
commit 35ebeb21bd
3 changed files with 97 additions and 45 deletions
--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@@ -67,7 +67,8 @@ namespace cv
        void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
        void GpuMat::release() { throw_nogpu(); }

-        void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
+        void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
+        void MatPL::get_property_device() { throw_nogpu(); }
        void MatPL::release() { throw_nogpu(); }
    }

@@ -164,7 +165,7 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
    else
        impl::set_to_with_mask( *this, depth(), s.val, mask, channels());

-    return *this;   
+    return *this;
 }


@@ -209,6 +210,15 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
    return hdr;
 }

+bool cv::gpu::MatPL::can_device_map_to_host()
+{
+        cudaDeviceProp prop;
+        cudaGetDeviceProperties(&prop, 0);
+
+        return (prop.canMapHostMemory != 0) ? true : false;
+}
+
+
 void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
 {
    _type &= TYPE_MASK;
@@ -259,8 +269,9 @@ void cv::gpu::GpuMat::release()
 //////////////////////////////// MatPL ////////////////////////////////
 ///////////////////////////////////////////////////////////////////////

-void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
+void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
 {
+    alloc_type = type_alloc;
    _type &= TYPE_MASK;
    if( rows == _rows && cols == _cols && type() == _type && data )
        return;
@@ -281,7 +292,24 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)

        //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
        void *ptr;
-        cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) );
+
+        switch (type_alloc)
+        {
+            case ALLOC_PAGE_LOCKED:  cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
+            case ALLOC_ZEROCOPY:
+                if (can_device_map_to_host() == true)
+                {
+                    cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );
+                }
+                else
+                    cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
+                break;
+
+            case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
+
+            default:
+                cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
+        }

        datastart = data =  (uchar*)ptr;
        dataend = data + nettosize;
@@ -291,6 +319,19 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
    }
 }

+inline MatPL::operator GpuMat() const
+{
+    if (alloc_type == ALLOC_ZEROCOPY)
+    {
+        void ** pdev;
+        cudaHostGetDevicePointer( pdev, this->data, 0 );
+        GpuMat m(this->rows, this->cols, this->type(), *pdev, this->step);
+        return m;
+    }
+    else
+        cv::gpu::error("", __FILE__, __LINE__);
+}
+
 void cv::gpu::MatPL::release()
 {
    if( refcount && CV_XADD(refcount, -1) == 1 )