Merge pull request #1575 from jet47:gpu-buffer-pool

2013-10-21 16:35:14 +04:00
parent 21233656bd 342e007dc6
commit 29f37fc130
9 changed files with 816 additions and 40 deletions
--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@@ -61,16 +61,30 @@ namespace cv { namespace cuda {
 class CV_EXPORTS GpuMat
 {
 public:
+    class CV_EXPORTS Allocator
+    {
+    public:
+        virtual ~Allocator() {}
+
+        // allocator must fill data, step and refcount fields
+        virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
+        virtual void free(GpuMat* mat) = 0;
+    };
+
+    //! default allocator
+    static Allocator* defaultAllocator();
+    static void setDefaultAllocator(Allocator* allocator);
+
    //! default constructor
-    GpuMat();
+    explicit GpuMat(Allocator* allocator = defaultAllocator());

    //! constructs GpuMat of the specified size and type
-    GpuMat(int rows, int cols, int type);
-    GpuMat(Size size, int type);
+    GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
+    GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());

    //! constucts GpuMat and fills it with the specified value _s
-    GpuMat(int rows, int cols, int type, Scalar s);
-    GpuMat(Size size, int type, Scalar s);
+    GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
+    GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());

    //! copy constructor
    GpuMat(const GpuMat& m);
@@ -84,7 +98,7 @@ public:
    GpuMat(const GpuMat& m, Rect roi);

    //! builds GpuMat from host memory (Blocking call)
-    explicit GpuMat(InputArray arr);
+    explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());

    //! destructor - calls release()
    ~GpuMat();
@@ -249,6 +263,9 @@ public:
    //! helper fields used in locateROI and adjustROI
    uchar* datastart;
    uchar* dataend;
+
+    //! allocator
+    Allocator* allocator;
 };

 //! creates continuous matrix
@@ -260,6 +277,10 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr

 CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);

+//! BufferPool management (must be called before Stream creation)
+CV_EXPORTS void setBufferPoolUsage(bool on);
+CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
+
 //////////////////////////////// CudaMem ////////////////////////////////

 // CudaMem is limited cv::Mat with page locked memory allocation.
@@ -382,6 +403,7 @@ private:
    Stream(const Ptr<Impl>& impl);

    friend struct StreamAccessor;
+    friend class BufferPool;
 };

 class CV_EXPORTS Event
--- a/modules/core/include/opencv2/core/cuda.inl.hpp
+++ b/modules/core/include/opencv2/core/cuda.inl.hpp
@@ -51,29 +51,29 @@ namespace cv { namespace cuda {
 //////////////////////////////// GpuMat ///////////////////////////////

 inline
-GpuMat::GpuMat()
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+GpuMat::GpuMat(Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {}

 inline
-GpuMat::GpuMat(int rows_, int cols_, int type_)
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (rows_ > 0 && cols_ > 0)
        create(rows_, cols_, type_);
 }

 inline
-GpuMat::GpuMat(Size size_, int type_)
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (size_.height > 0 && size_.width > 0)
        create(size_.height, size_.width, type_);
 }

 inline
-GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (rows_ > 0 && cols_ > 0)
    {
@@ -83,8 +83,8 @@ GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
 }

 inline
-GpuMat::GpuMat(Size size_, int type_, Scalar s_)
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (size_.height > 0 && size_.width > 0)
    {
@@ -95,15 +95,15 @@ GpuMat::GpuMat(Size size_, int type_, Scalar s_)

 inline
 GpuMat::GpuMat(const GpuMat& m)
-    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
 {
    if (refcount)
        CV_XADD(refcount, 1);
 }

 inline
-GpuMat::GpuMat(InputArray arr) :
-    flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
+    flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    upload(arr);
 }
--- a/modules/core/include/opencv2/core/private.cuda.hpp
+++ b/modules/core/include/opencv2/core/private.cuda.hpp
@@ -90,6 +90,38 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The

 namespace cv { namespace cuda
 {
+    class MemoryStack;
+
+    class CV_EXPORTS StackAllocator : public GpuMat::Allocator
+    {
+    public:
+        explicit StackAllocator(cudaStream_t stream);
+        ~StackAllocator();
+
+        bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
+        void free(GpuMat* mat);
+
+    private:
+        StackAllocator(const StackAllocator&);
+        StackAllocator& operator =(const StackAllocator&);
+
+        cudaStream_t stream_;
+        MemoryStack* memStack_;
+        size_t alignment_;
+    };
+
+    class CV_EXPORTS BufferPool
+    {
+    public:
+        explicit BufferPool(Stream& stream);
+
+        GpuMat getBuffer(int rows, int cols, int type);
+        GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
+
+    private:
+        GpuMat::Allocator* allocator_;
+    };
+
    static inline void checkNppError(int code, const char* file, const int line, const char* func)
    {
        if (code < 0)