refactored CudaMem (now alloc type assign only in constructor)

2013-04-16 14:39:42 +04:00
parent cc34a8ac3c
commit a52af84dcf
5 changed files with 275 additions and 233 deletions
--- a/modules/core/include/opencv2/core/gpu.hpp
+++ b/modules/core/include/opencv2/core/gpu.hpp
@@ -252,66 +252,59 @@ public:
    uchar* dataend;
 };

-//! Creates continuous GPU matrix
+//! creates continuous GPU matrix
 CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m);

-//! Ensures that size of the given matrix is not less than (rows, cols) size
+//! ensures that size of the given matrix is not less than (rows, cols) size
 //! and matrix type is match specified one too
 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);

 CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);

 //////////////////////////////// CudaMem ////////////////////////////////
+
 // CudaMem is limited cv::Mat with page locked memory allocation.
 // Page locked memory is only needed for async and faster coping to GPU.
 // It is convertable to cv::Mat header without reference counting
 // so you can use it with other opencv functions.

-// Page-locks the matrix m memory and maps it for the device(s)
-CV_EXPORTS void registerPageLocked(Mat& m);
-
-// Unmaps the memory of matrix m, and makes it pageable again.
-CV_EXPORTS void unregisterPageLocked(Mat& m);
-
 class CV_EXPORTS CudaMem
 {
 public:
-    enum  { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 };
+    enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
+
+    explicit CudaMem(AllocType alloc_type = PAGE_LOCKED);

-    CudaMem();
    CudaMem(const CudaMem& m);

-    CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED);
-    CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
+    CudaMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
+    CudaMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);

-
-    //! creates from cv::Mat with coping data
-    explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED);
+    //! creates from host memory with coping data
+    explicit CudaMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);

    ~CudaMem();

-    CudaMem& operator = (const CudaMem& m);
+    CudaMem& operator =(const CudaMem& m);
+
+    //! swaps with other smart pointer
+    void swap(CudaMem& b);

    //! returns deep copy of the matrix, i.e. the data is copied
    CudaMem clone() const;

    //! allocates new matrix data unless the matrix already has specified size and type.
-    void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED);
-    void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
+    void create(int rows, int cols, int type);
+    void create(Size size, int type);

    //! decrements reference counter and released memory if needed.
    void release();

    //! returns matrix header with disabled reference counting for CudaMem data.
    Mat createMatHeader() const;
-    operator Mat() const;

    //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
    GpuMat createGpuMatHeader() const;
-    operator GpuMat() const;
-
-    //returns if host memory can be mapperd to gpu address space;
-    static bool canMapHostMemory();

    // Please see cv::Mat for descriptions
    bool isContinuous() const;
@@ -324,7 +317,6 @@ public:
    Size size() const;
    bool empty() const;

-
    // Please see cv::Mat for descriptions
    int flags;
    int rows, cols;
@@ -336,9 +328,14 @@ public:
    uchar* datastart;
    uchar* dataend;

-    int alloc_type;
+    AllocType alloc_type;
 };

+//! page-locks the matrix m memory and maps it for the device(s)
+CV_EXPORTS void registerPageLocked(Mat& m);
+
+//! unmaps the memory of matrix m, and makes it pageable again
+CV_EXPORTS void unregisterPageLocked(Mat& m);

 //////////////////////////////// CudaStream ////////////////////////////////
 // Encapculates Cuda Stream. Provides interface for async coping.
@@ -480,6 +477,10 @@ public:
    // Checks whether the GPU module can be run on the given device
    bool isCompatible() const;

+    bool canMapHostMemory() const;
+
+    size_t textureAlignment() const;
+
    int deviceID() const { return device_id_; }

 private:
--- a/modules/core/include/opencv2/core/gpu.inl.hpp
+++ b/modules/core/include/opencv2/core/gpu.inl.hpp
@@ -373,8 +373,161 @@ void swap(GpuMat& a, GpuMat& b)
    a.swap(b);
 }

+//////////////////////////////// CudaMem ////////////////////////////////
+
+inline
+CudaMem::CudaMem(AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+}
+
+inline
+CudaMem::CudaMem(const CudaMem& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
+{
+    if( refcount )
+        CV_XADD(refcount, 1);
+}
+
+inline
+CudaMem::CudaMem(int rows_, int cols_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (rows_ > 0 && cols_ > 0)
+        create(rows_, cols_, type_);
+}
+
+inline
+CudaMem::CudaMem(Size size_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (size_.height > 0 && size_.width > 0)
+        create(size_.height, size_.width, type_);
+}
+
+inline
+CudaMem::CudaMem(InputArray arr, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    arr.getMat().copyTo(*this);
+}
+
+inline
+CudaMem::~CudaMem()
+{
+    release();
+}
+
+inline
+CudaMem& CudaMem::operator =(const CudaMem& m)
+{
+    if (this != &m)
+    {
+        CudaMem temp(m);
+        swap(temp);
+    }
+
+    return *this;
+}
+
+inline
+void CudaMem::swap(CudaMem& b)
+{
+    std::swap(flags, b.flags);
+    std::swap(rows, b.rows);
+    std::swap(cols, b.cols);
+    std::swap(step, b.step);
+    std::swap(data, b.data);
+    std::swap(datastart, b.datastart);
+    std::swap(dataend, b.dataend);
+    std::swap(refcount, b.refcount);
+    std::swap(alloc_type, b.alloc_type);
+}
+
+inline
+CudaMem CudaMem::clone() const
+{
+    CudaMem m(size(), type(), alloc_type);
+    createMatHeader().copyTo(m);
+    return m;
+}
+
+inline
+void CudaMem::create(Size size_, int type_)
+{
+    create(size_.height, size_.width, type_);
+}
+
+inline
+Mat CudaMem::createMatHeader() const
+{
+    return Mat(size(), type(), data, step);
+}
+
+inline
+bool CudaMem::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+size_t CudaMem::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t CudaMem::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int CudaMem::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int CudaMem::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int CudaMem::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t CudaMem::step1() const
+{
+    return step / elemSize1();
+}
+
+inline
+Size CudaMem::size() const
+{
+    return Size(cols, rows);
+}
+
+inline
+bool CudaMem::empty() const
+{
+    return data == 0;
+}
+
+static inline
+void swap(CudaMem& a, CudaMem& b)
+{
+    a.swap(b);
+}
+
 }} // namespace cv { namespace gpu

+//////////////////////////////// Mat ////////////////////////////////
+
 namespace cv {

 inline