Merge pull request #1575 from jet47:gpu-buffer-pool

This commit is contained in:
Roman Donchenko
2013-10-21 16:35:14 +04:00
committed by OpenCV Buildbot
9 changed files with 816 additions and 40 deletions

View File

@@ -61,16 +61,30 @@ namespace cv { namespace cuda {
class CV_EXPORTS GpuMat
{
public:
class CV_EXPORTS Allocator
{
public:
virtual ~Allocator() {}
// allocator must fill data, step and refcount fields
virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
virtual void free(GpuMat* mat) = 0;
};
//! default allocator
static Allocator* defaultAllocator();
static void setDefaultAllocator(Allocator* allocator);
//! default constructor
GpuMat();
explicit GpuMat(Allocator* allocator = defaultAllocator());
//! constructs GpuMat of the specified size and type
GpuMat(int rows, int cols, int type);
GpuMat(Size size, int type);
GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
//! constucts GpuMat and fills it with the specified value _s
GpuMat(int rows, int cols, int type, Scalar s);
GpuMat(Size size, int type, Scalar s);
GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
//! copy constructor
GpuMat(const GpuMat& m);
@@ -84,7 +98,7 @@ public:
GpuMat(const GpuMat& m, Rect roi);
//! builds GpuMat from host memory (Blocking call)
explicit GpuMat(InputArray arr);
explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
//! destructor - calls release()
~GpuMat();
@@ -249,6 +263,9 @@ public:
//! helper fields used in locateROI and adjustROI
uchar* datastart;
uchar* dataend;
//! allocator
Allocator* allocator;
};
//! creates continuous matrix
@@ -260,6 +277,10 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
//! BufferPool management (must be called before Stream creation)
CV_EXPORTS void setBufferPoolUsage(bool on);
CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
//////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation.
@@ -382,6 +403,7 @@ private:
Stream(const Ptr<Impl>& impl);
friend struct StreamAccessor;
friend class BufferPool;
};
class CV_EXPORTS Event

View File

@@ -51,29 +51,29 @@ namespace cv { namespace cuda {
//////////////////////////////// GpuMat ///////////////////////////////
inline
GpuMat::GpuMat()
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
GpuMat::GpuMat(Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
GpuMat::GpuMat(Size size_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (rows_ > 0 && cols_ > 0)
{
@@ -83,8 +83,8 @@ GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
}
inline
GpuMat::GpuMat(Size size_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (size_.height > 0 && size_.width > 0)
{
@@ -95,15 +95,15 @@ GpuMat::GpuMat(Size size_, int type_, Scalar s_)
inline
GpuMat::GpuMat(const GpuMat& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
{
if (refcount)
CV_XADD(refcount, 1);
}
inline
GpuMat::GpuMat(InputArray arr) :
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
upload(arr);
}

View File

@@ -90,6 +90,38 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The
namespace cv { namespace cuda
{
class MemoryStack;
class CV_EXPORTS StackAllocator : public GpuMat::Allocator
{
public:
explicit StackAllocator(cudaStream_t stream);
~StackAllocator();
bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
void free(GpuMat* mat);
private:
StackAllocator(const StackAllocator&);
StackAllocator& operator =(const StackAllocator&);
cudaStream_t stream_;
MemoryStack* memStack_;
size_t alignment_;
};
class CV_EXPORTS BufferPool
{
public:
explicit BufferPool(Stream& stream);
GpuMat getBuffer(int rows, int cols, int type);
GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
private:
GpuMat::Allocator* allocator_;
};
static inline void checkNppError(int code, const char* file, const int line, const char* func)
{
if (code < 0)