Merge pull request #1575 from jet47:gpu-buffer-pool
This commit is contained in:
commit
29f37fc130
@ -61,16 +61,30 @@ namespace cv { namespace cuda {
|
|||||||
class CV_EXPORTS GpuMat
|
class CV_EXPORTS GpuMat
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
class CV_EXPORTS Allocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~Allocator() {}
|
||||||
|
|
||||||
|
// allocator must fill data, step and refcount fields
|
||||||
|
virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
|
||||||
|
virtual void free(GpuMat* mat) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
//! default allocator
|
||||||
|
static Allocator* defaultAllocator();
|
||||||
|
static void setDefaultAllocator(Allocator* allocator);
|
||||||
|
|
||||||
//! default constructor
|
//! default constructor
|
||||||
GpuMat();
|
explicit GpuMat(Allocator* allocator = defaultAllocator());
|
||||||
|
|
||||||
//! constructs GpuMat of the specified size and type
|
//! constructs GpuMat of the specified size and type
|
||||||
GpuMat(int rows, int cols, int type);
|
GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
|
||||||
GpuMat(Size size, int type);
|
GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
|
||||||
|
|
||||||
//! constucts GpuMat and fills it with the specified value _s
|
//! constucts GpuMat and fills it with the specified value _s
|
||||||
GpuMat(int rows, int cols, int type, Scalar s);
|
GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||||
GpuMat(Size size, int type, Scalar s);
|
GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||||
|
|
||||||
//! copy constructor
|
//! copy constructor
|
||||||
GpuMat(const GpuMat& m);
|
GpuMat(const GpuMat& m);
|
||||||
@ -84,7 +98,7 @@ public:
|
|||||||
GpuMat(const GpuMat& m, Rect roi);
|
GpuMat(const GpuMat& m, Rect roi);
|
||||||
|
|
||||||
//! builds GpuMat from host memory (Blocking call)
|
//! builds GpuMat from host memory (Blocking call)
|
||||||
explicit GpuMat(InputArray arr);
|
explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
|
||||||
|
|
||||||
//! destructor - calls release()
|
//! destructor - calls release()
|
||||||
~GpuMat();
|
~GpuMat();
|
||||||
@ -249,6 +263,9 @@ public:
|
|||||||
//! helper fields used in locateROI and adjustROI
|
//! helper fields used in locateROI and adjustROI
|
||||||
uchar* datastart;
|
uchar* datastart;
|
||||||
uchar* dataend;
|
uchar* dataend;
|
||||||
|
|
||||||
|
//! allocator
|
||||||
|
Allocator* allocator;
|
||||||
};
|
};
|
||||||
|
|
||||||
//! creates continuous matrix
|
//! creates continuous matrix
|
||||||
@ -260,6 +277,10 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr
|
|||||||
|
|
||||||
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
|
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
|
||||||
|
|
||||||
|
//! BufferPool management (must be called before Stream creation)
|
||||||
|
CV_EXPORTS void setBufferPoolUsage(bool on);
|
||||||
|
CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
|
||||||
|
|
||||||
//////////////////////////////// CudaMem ////////////////////////////////
|
//////////////////////////////// CudaMem ////////////////////////////////
|
||||||
|
|
||||||
// CudaMem is limited cv::Mat with page locked memory allocation.
|
// CudaMem is limited cv::Mat with page locked memory allocation.
|
||||||
@ -382,6 +403,7 @@ private:
|
|||||||
Stream(const Ptr<Impl>& impl);
|
Stream(const Ptr<Impl>& impl);
|
||||||
|
|
||||||
friend struct StreamAccessor;
|
friend struct StreamAccessor;
|
||||||
|
friend class BufferPool;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CV_EXPORTS Event
|
class CV_EXPORTS Event
|
||||||
|
@ -51,29 +51,29 @@ namespace cv { namespace cuda {
|
|||||||
//////////////////////////////// GpuMat ///////////////////////////////
|
//////////////////////////////// GpuMat ///////////////////////////////
|
||||||
|
|
||||||
inline
|
inline
|
||||||
GpuMat::GpuMat()
|
GpuMat::GpuMat(Allocator* allocator_)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
GpuMat::GpuMat(int rows_, int cols_, int type_)
|
GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
{
|
{
|
||||||
if (rows_ > 0 && cols_ > 0)
|
if (rows_ > 0 && cols_ > 0)
|
||||||
create(rows_, cols_, type_);
|
create(rows_, cols_, type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
GpuMat::GpuMat(Size size_, int type_)
|
GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
{
|
{
|
||||||
if (size_.height > 0 && size_.width > 0)
|
if (size_.height > 0 && size_.width > 0)
|
||||||
create(size_.height, size_.width, type_);
|
create(size_.height, size_.width, type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
|
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
{
|
{
|
||||||
if (rows_ > 0 && cols_ > 0)
|
if (rows_ > 0 && cols_ > 0)
|
||||||
{
|
{
|
||||||
@ -83,8 +83,8 @@ GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
GpuMat::GpuMat(Size size_, int type_, Scalar s_)
|
GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
{
|
{
|
||||||
if (size_.height > 0 && size_.width > 0)
|
if (size_.height > 0 && size_.width > 0)
|
||||||
{
|
{
|
||||||
@ -95,15 +95,15 @@ GpuMat::GpuMat(Size size_, int type_, Scalar s_)
|
|||||||
|
|
||||||
inline
|
inline
|
||||||
GpuMat::GpuMat(const GpuMat& m)
|
GpuMat::GpuMat(const GpuMat& m)
|
||||||
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
|
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
|
||||||
{
|
{
|
||||||
if (refcount)
|
if (refcount)
|
||||||
CV_XADD(refcount, 1);
|
CV_XADD(refcount, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
GpuMat::GpuMat(InputArray arr) :
|
GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
|
||||||
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
{
|
{
|
||||||
upload(arr);
|
upload(arr);
|
||||||
}
|
}
|
||||||
|
@ -90,6 +90,38 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The
|
|||||||
|
|
||||||
namespace cv { namespace cuda
|
namespace cv { namespace cuda
|
||||||
{
|
{
|
||||||
|
class MemoryStack;
|
||||||
|
|
||||||
|
class CV_EXPORTS StackAllocator : public GpuMat::Allocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit StackAllocator(cudaStream_t stream);
|
||||||
|
~StackAllocator();
|
||||||
|
|
||||||
|
bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
|
||||||
|
void free(GpuMat* mat);
|
||||||
|
|
||||||
|
private:
|
||||||
|
StackAllocator(const StackAllocator&);
|
||||||
|
StackAllocator& operator =(const StackAllocator&);
|
||||||
|
|
||||||
|
cudaStream_t stream_;
|
||||||
|
MemoryStack* memStack_;
|
||||||
|
size_t alignment_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CV_EXPORTS BufferPool
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit BufferPool(Stream& stream);
|
||||||
|
|
||||||
|
GpuMat getBuffer(int rows, int cols, int type);
|
||||||
|
GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
GpuMat::Allocator* allocator_;
|
||||||
|
};
|
||||||
|
|
||||||
static inline void checkNppError(int code, const char* file, const int line, const char* func)
|
static inline void checkNppError(int code, const char* file, const int line, const char* func)
|
||||||
{
|
{
|
||||||
if (code < 0)
|
if (code < 0)
|
||||||
|
@ -55,6 +55,54 @@ using namespace cv;
|
|||||||
using namespace cv::cuda;
|
using namespace cv::cuda;
|
||||||
using namespace cv::cudev;
|
using namespace cv::cudev;
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
class DefaultAllocator : public GpuMat::Allocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
|
||||||
|
void free(GpuMat* mat);
|
||||||
|
};
|
||||||
|
|
||||||
|
bool DefaultAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize)
|
||||||
|
{
|
||||||
|
if (rows > 1 && cols > 1)
|
||||||
|
{
|
||||||
|
CV_CUDEV_SAFE_CALL( cudaMallocPitch(&mat->data, &mat->step, elemSize * cols, rows) );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Single row or single column must be continuous
|
||||||
|
CV_CUDEV_SAFE_CALL( cudaMalloc(&mat->data, elemSize * cols * rows) );
|
||||||
|
mat->step = elemSize * cols;
|
||||||
|
}
|
||||||
|
|
||||||
|
mat->refcount = (int*) fastMalloc(sizeof(int));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DefaultAllocator::free(GpuMat* mat)
|
||||||
|
{
|
||||||
|
cudaFree(mat->datastart);
|
||||||
|
fastFree(mat->refcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
DefaultAllocator cudaDefaultAllocator;
|
||||||
|
GpuMat::Allocator* g_defaultAllocator = &cudaDefaultAllocator;
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()
|
||||||
|
{
|
||||||
|
return g_defaultAllocator;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator)
|
||||||
|
{
|
||||||
|
CV_Assert( allocator != 0 );
|
||||||
|
g_defaultAllocator = allocator;
|
||||||
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
/// create
|
/// create
|
||||||
|
|
||||||
@ -76,19 +124,16 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
|
|||||||
rows = _rows;
|
rows = _rows;
|
||||||
cols = _cols;
|
cols = _cols;
|
||||||
|
|
||||||
size_t esz = elemSize();
|
const size_t esz = elemSize();
|
||||||
|
|
||||||
void* devPtr;
|
bool allocSuccess = allocator->allocate(this, rows, cols, esz);
|
||||||
|
|
||||||
if (rows > 1 && cols > 1)
|
if (!allocSuccess)
|
||||||
{
|
{
|
||||||
CV_CUDEV_SAFE_CALL( cudaMallocPitch(&devPtr, &step, esz * cols, rows) );
|
// custom allocator fails, try default allocator
|
||||||
}
|
allocator = defaultAllocator();
|
||||||
else
|
allocSuccess = allocator->allocate(this, rows, cols, esz);
|
||||||
{
|
CV_Assert( allocSuccess );
|
||||||
// Single row or single column must be continuous
|
|
||||||
CV_CUDEV_SAFE_CALL( cudaMalloc(&devPtr, esz * cols * rows) );
|
|
||||||
step = esz * cols;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (esz * cols == step)
|
if (esz * cols == step)
|
||||||
@ -97,11 +142,11 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
|
|||||||
int64 _nettosize = static_cast<int64>(step) * rows;
|
int64 _nettosize = static_cast<int64>(step) * rows;
|
||||||
size_t nettosize = static_cast<size_t>(_nettosize);
|
size_t nettosize = static_cast<size_t>(_nettosize);
|
||||||
|
|
||||||
datastart = data = static_cast<uchar*>(devPtr);
|
datastart = data;
|
||||||
dataend = data + nettosize;
|
dataend = data + nettosize;
|
||||||
|
|
||||||
refcount = static_cast<int*>(fastMalloc(sizeof(*refcount)));
|
if (refcount)
|
||||||
*refcount = 1;
|
*refcount = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,11 +155,10 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
|
|||||||
|
|
||||||
void cv::cuda::GpuMat::release()
|
void cv::cuda::GpuMat::release()
|
||||||
{
|
{
|
||||||
|
CV_DbgAssert( allocator != 0 );
|
||||||
|
|
||||||
if (refcount && CV_XADD(refcount, -1) == 1)
|
if (refcount && CV_XADD(refcount, -1) == 1)
|
||||||
{
|
allocator->free(this);
|
||||||
cudaFree(datastart);
|
|
||||||
fastFree(refcount);
|
|
||||||
}
|
|
||||||
|
|
||||||
data = datastart = dataend = 0;
|
data = datastart = dataend = 0;
|
||||||
step = rows = cols = 0;
|
step = rows = cols = 0;
|
||||||
|
418
modules/core/src/cuda_buffer_pool.cpp
Normal file
418
modules/core/src/cuda_buffer_pool.cpp
Normal file
@ -0,0 +1,418 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::cuda;
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
|
||||||
|
#include "opencv2/cudev/common.hpp"
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
/// MemoryStack
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
class MemoryPool;
|
||||||
|
}
|
||||||
|
|
||||||
|
class cv::cuda::MemoryStack
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
uchar* requestMemory(size_t size);
|
||||||
|
void returnMemory(uchar* ptr);
|
||||||
|
|
||||||
|
uchar* datastart;
|
||||||
|
uchar* dataend;
|
||||||
|
uchar* tip;
|
||||||
|
|
||||||
|
bool isFree;
|
||||||
|
MemoryPool* pool;
|
||||||
|
|
||||||
|
#if defined(DEBUG) || defined(_DEBUG)
|
||||||
|
std::vector<size_t> allocations;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
uchar* cv::cuda::MemoryStack::requestMemory(size_t size)
|
||||||
|
{
|
||||||
|
const size_t freeMem = dataend - tip;
|
||||||
|
|
||||||
|
if (size > freeMem)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
uchar* ptr = tip;
|
||||||
|
|
||||||
|
tip += size;
|
||||||
|
|
||||||
|
#if defined(DEBUG) || defined(_DEBUG)
|
||||||
|
allocations.push_back(size);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::cuda::MemoryStack::returnMemory(uchar* ptr)
|
||||||
|
{
|
||||||
|
CV_DbgAssert( ptr >= datastart && ptr < dataend );
|
||||||
|
|
||||||
|
#if defined(DEBUG) || defined(_DEBUG)
|
||||||
|
const size_t allocSize = tip - ptr;
|
||||||
|
CV_Assert( allocSize == allocations.back() );
|
||||||
|
allocations.pop_back();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
tip = ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
/// MemoryPool
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
class MemoryPool
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
MemoryPool();
|
||||||
|
|
||||||
|
void initialize(size_t stackSize, int stackCount);
|
||||||
|
void release();
|
||||||
|
|
||||||
|
MemoryStack* getFreeMemStack();
|
||||||
|
void returnMemStack(MemoryStack* memStack);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void initilizeImpl();
|
||||||
|
|
||||||
|
Mutex mtx_;
|
||||||
|
|
||||||
|
bool initialized_;
|
||||||
|
size_t stackSize_;
|
||||||
|
int stackCount_;
|
||||||
|
|
||||||
|
uchar* mem_;
|
||||||
|
|
||||||
|
std::vector<MemoryStack> stacks_;
|
||||||
|
};
|
||||||
|
|
||||||
|
MemoryPool::MemoryPool() : initialized_(false), mem_(0)
|
||||||
|
{
|
||||||
|
// default : 10 Mb, 5 stacks
|
||||||
|
stackSize_ = 10 * 1024 * 1024;
|
||||||
|
stackCount_ = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryPool::initialize(size_t stackSize, int stackCount)
|
||||||
|
{
|
||||||
|
AutoLock lock(mtx_);
|
||||||
|
|
||||||
|
release();
|
||||||
|
|
||||||
|
stackSize_ = stackSize;
|
||||||
|
stackCount_ = stackCount;
|
||||||
|
|
||||||
|
initilizeImpl();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryPool::initilizeImpl()
|
||||||
|
{
|
||||||
|
const size_t totalSize = stackSize_ * stackCount_;
|
||||||
|
|
||||||
|
if (totalSize > 0)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaMalloc(&mem_, totalSize);
|
||||||
|
if (err != cudaSuccess)
|
||||||
|
return;
|
||||||
|
|
||||||
|
stacks_.resize(stackCount_);
|
||||||
|
|
||||||
|
uchar* ptr = mem_;
|
||||||
|
|
||||||
|
for (int i = 0; i < stackCount_; ++i)
|
||||||
|
{
|
||||||
|
stacks_[i].datastart = ptr;
|
||||||
|
stacks_[i].dataend = ptr + stackSize_;
|
||||||
|
stacks_[i].tip = ptr;
|
||||||
|
stacks_[i].isFree = true;
|
||||||
|
stacks_[i].pool = this;
|
||||||
|
|
||||||
|
ptr += stackSize_;
|
||||||
|
}
|
||||||
|
|
||||||
|
initialized_ = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryPool::release()
|
||||||
|
{
|
||||||
|
if (mem_)
|
||||||
|
{
|
||||||
|
#if defined(DEBUG) || defined(_DEBUG)
|
||||||
|
for (int i = 0; i < stackCount_; ++i)
|
||||||
|
{
|
||||||
|
CV_DbgAssert( stacks_[i].isFree );
|
||||||
|
CV_DbgAssert( stacks_[i].tip == stacks_[i].datastart );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cudaFree( mem_ );
|
||||||
|
|
||||||
|
mem_ = 0;
|
||||||
|
initialized_ = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryStack* MemoryPool::getFreeMemStack()
|
||||||
|
{
|
||||||
|
AutoLock lock(mtx_);
|
||||||
|
|
||||||
|
if (!initialized_)
|
||||||
|
initilizeImpl();
|
||||||
|
|
||||||
|
if (!mem_)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < stackCount_; ++i)
|
||||||
|
{
|
||||||
|
if (stacks_[i].isFree)
|
||||||
|
{
|
||||||
|
stacks_[i].isFree = false;
|
||||||
|
return &stacks_[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryPool::returnMemStack(MemoryStack* memStack)
|
||||||
|
{
|
||||||
|
AutoLock lock(mtx_);
|
||||||
|
|
||||||
|
CV_DbgAssert( !memStack->isFree );
|
||||||
|
|
||||||
|
#if defined(DEBUG) || defined(_DEBUG)
|
||||||
|
bool found = false;
|
||||||
|
for (int i = 0; i < stackCount_; ++i)
|
||||||
|
{
|
||||||
|
if (memStack == &stacks_[i])
|
||||||
|
{
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CV_DbgAssert( found );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
CV_DbgAssert( memStack->tip == memStack->datastart );
|
||||||
|
|
||||||
|
memStack->isFree = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
/// MemoryPoolManager
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
class MemoryPoolManager
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
MemoryPoolManager();
|
||||||
|
~MemoryPoolManager();
|
||||||
|
|
||||||
|
MemoryPool* getPool(int deviceId);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<MemoryPool> pools_;
|
||||||
|
};
|
||||||
|
|
||||||
|
MemoryPoolManager::MemoryPoolManager()
|
||||||
|
{
|
||||||
|
int deviceCount = getCudaEnabledDeviceCount();
|
||||||
|
|
||||||
|
if (deviceCount > 0)
|
||||||
|
pools_.resize(deviceCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryPoolManager::~MemoryPoolManager()
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < pools_.size(); ++i)
|
||||||
|
{
|
||||||
|
cudaSetDevice(i);
|
||||||
|
pools_[i].release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryPool* MemoryPoolManager::getPool(int deviceId)
|
||||||
|
{
|
||||||
|
CV_DbgAssert( deviceId >= 0 && deviceId < static_cast<int>(pools_.size()) );
|
||||||
|
return &pools_[deviceId];
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryPool* memPool(int deviceId)
|
||||||
|
{
|
||||||
|
static MemoryPoolManager manager;
|
||||||
|
return manager.getPool(deviceId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
/// StackAllocator
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
bool enableMemoryPool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0)
|
||||||
|
{
|
||||||
|
if (enableMemoryPool)
|
||||||
|
{
|
||||||
|
const int deviceId = getDevice();
|
||||||
|
memStack_ = memPool(deviceId)->getFreeMemStack();
|
||||||
|
|
||||||
|
DeviceInfo devInfo(deviceId);
|
||||||
|
alignment_ = devInfo.textureAlignment();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::cuda::StackAllocator::~StackAllocator()
|
||||||
|
{
|
||||||
|
cudaStreamSynchronize(stream_);
|
||||||
|
|
||||||
|
if (memStack_ != 0)
|
||||||
|
memStack_->pool->returnMemStack(memStack_);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
size_t alignUp(size_t what, size_t alignment)
|
||||||
|
{
|
||||||
|
size_t alignMask = alignment-1;
|
||||||
|
size_t inverseAlignMask = ~alignMask;
|
||||||
|
size_t res = (what + alignMask) & inverseAlignMask;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool cv::cuda::StackAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize)
|
||||||
|
{
|
||||||
|
if (memStack_ == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
size_t pitch, memSize;
|
||||||
|
|
||||||
|
if (rows > 1 && cols > 1)
|
||||||
|
{
|
||||||
|
pitch = alignUp(cols * elemSize, alignment_);
|
||||||
|
memSize = pitch * rows;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Single row or single column must be continuous
|
||||||
|
pitch = elemSize * cols;
|
||||||
|
memSize = alignUp(elemSize * cols * rows, 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
uchar* ptr = memStack_->requestMemory(memSize);
|
||||||
|
|
||||||
|
if (ptr == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
mat->data = ptr;
|
||||||
|
mat->step = pitch;
|
||||||
|
mat->refcount = (int*) fastMalloc(sizeof(int));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::cuda::StackAllocator::free(GpuMat* mat)
|
||||||
|
{
|
||||||
|
if (memStack_ == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
memStack_->returnMemory(mat->datastart);
|
||||||
|
fastFree(mat->refcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::cuda::setBufferPoolUsage(bool on)
|
||||||
|
{
|
||||||
|
enableMemoryPool = on;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount)
|
||||||
|
{
|
||||||
|
const int currentDevice = getDevice();
|
||||||
|
|
||||||
|
if (deviceId >= 0)
|
||||||
|
{
|
||||||
|
setDevice(deviceId);
|
||||||
|
memPool(deviceId)->initialize(stackSize, stackCount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int deviceCount = getCudaEnabledDeviceCount();
|
||||||
|
|
||||||
|
for (deviceId = 0; deviceId < deviceCount; ++deviceId)
|
||||||
|
{
|
||||||
|
setDevice(deviceId);
|
||||||
|
memPool(deviceId)->initialize(stackSize, stackCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setDevice(currentDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
/// BufferPool
|
||||||
|
|
||||||
|
GpuMat cv::cuda::BufferPool::getBuffer(int rows, int cols, int type)
|
||||||
|
{
|
||||||
|
GpuMat buf(allocator_);
|
||||||
|
buf.create(rows, cols, type);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -49,7 +49,8 @@ using namespace cv::cuda;
|
|||||||
cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
|
cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
|
||||||
flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_),
|
flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_),
|
||||||
step(step_), data((uchar*)data_), refcount(0),
|
step(step_), data((uchar*)data_), refcount(0),
|
||||||
datastart((uchar*)data_), dataend((uchar*)data_)
|
datastart((uchar*)data_), dataend((uchar*)data_),
|
||||||
|
allocator(defaultAllocator())
|
||||||
{
|
{
|
||||||
size_t minstep = cols * elemSize();
|
size_t minstep = cols * elemSize();
|
||||||
|
|
||||||
@ -74,7 +75,8 @@ cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t st
|
|||||||
cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
|
cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
|
||||||
flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width),
|
flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width),
|
||||||
step(step_), data((uchar*)data_), refcount(0),
|
step(step_), data((uchar*)data_), refcount(0),
|
||||||
datastart((uchar*)data_), dataend((uchar*)data_)
|
datastart((uchar*)data_), dataend((uchar*)data_),
|
||||||
|
allocator(defaultAllocator())
|
||||||
{
|
{
|
||||||
size_t minstep = cols * elemSize();
|
size_t minstep = cols * elemSize();
|
||||||
|
|
||||||
@ -92,6 +94,7 @@ cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
|
|||||||
|
|
||||||
flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
|
flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
dataend += step * (rows - 1) + minstep;
|
dataend += step * (rows - 1) + minstep;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,6 +103,7 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_)
|
|||||||
flags = m.flags;
|
flags = m.flags;
|
||||||
step = m.step; refcount = m.refcount;
|
step = m.step; refcount = m.refcount;
|
||||||
data = m.data; datastart = m.datastart; dataend = m.dataend;
|
data = m.data; datastart = m.datastart; dataend = m.dataend;
|
||||||
|
allocator = m.allocator;
|
||||||
|
|
||||||
if (rowRange_ == Range::all())
|
if (rowRange_ == Range::all())
|
||||||
{
|
{
|
||||||
@ -139,7 +143,8 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_)
|
|||||||
cv::cuda::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
|
cv::cuda::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
|
||||||
flags(m.flags), rows(roi.height), cols(roi.width),
|
flags(m.flags), rows(roi.height), cols(roi.width),
|
||||||
step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
|
step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
|
||||||
datastart(m.datastart), dataend(m.dataend)
|
datastart(m.datastart), dataend(m.dataend),
|
||||||
|
allocator(m.allocator)
|
||||||
{
|
{
|
||||||
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
||||||
data += roi.x * elemSize();
|
data += roi.x * elemSize();
|
||||||
@ -347,6 +352,17 @@ GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
|
|||||||
|
|
||||||
#ifndef HAVE_CUDA
|
#ifndef HAVE_CUDA
|
||||||
|
|
||||||
|
GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator)
|
||||||
|
{
|
||||||
|
(void) allocator;
|
||||||
|
throw_no_cuda();
|
||||||
|
}
|
||||||
|
|
||||||
void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
|
void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
|
||||||
{
|
{
|
||||||
(void) _rows;
|
(void) _rows;
|
||||||
|
@ -66,6 +66,7 @@ class cv::cuda::Stream::Impl
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
cudaStream_t stream;
|
cudaStream_t stream;
|
||||||
|
Ptr<StackAllocator> stackAllocator_;
|
||||||
|
|
||||||
Impl();
|
Impl();
|
||||||
Impl(cudaStream_t stream);
|
Impl(cudaStream_t stream);
|
||||||
@ -73,17 +74,26 @@ public:
|
|||||||
~Impl();
|
~Impl();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
cv::cuda::BufferPool::BufferPool(Stream& stream) : allocator_(stream.impl_->stackAllocator_.get())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
cv::cuda::Stream::Impl::Impl() : stream(0)
|
cv::cuda::Stream::Impl::Impl() : stream(0)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaStreamCreate(&stream) );
|
cudaSafeCall( cudaStreamCreate(&stream) );
|
||||||
|
|
||||||
|
stackAllocator_ = makePtr<StackAllocator>(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
|
cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
|
||||||
{
|
{
|
||||||
|
stackAllocator_ = makePtr<StackAllocator>(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::cuda::Stream::Impl::~Impl()
|
cv::cuda::Stream::Impl::~Impl()
|
||||||
{
|
{
|
||||||
|
stackAllocator_.release();
|
||||||
|
|
||||||
if (stream)
|
if (stream)
|
||||||
cudaStreamDestroy(stream);
|
cudaStreamDestroy(stream);
|
||||||
}
|
}
|
||||||
@ -197,7 +207,7 @@ cv::cuda::Stream::operator bool_type() const
|
|||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
// Stream
|
// Event
|
||||||
|
|
||||||
#ifndef HAVE_CUDA
|
#ifndef HAVE_CUDA
|
||||||
|
|
||||||
|
114
modules/cuda/perf/perf_buffer_pool.cpp
Normal file
114
modules/cuda/perf/perf_buffer_pool.cpp
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "perf_precomp.hpp"
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
|
||||||
|
#include "opencv2/cudaarithm.hpp"
|
||||||
|
#include "opencv2/core/private.cuda.hpp"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
using namespace perf;
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::cuda;
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||||
|
{
|
||||||
|
BufferPool pool(stream);
|
||||||
|
|
||||||
|
GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels()));
|
||||||
|
|
||||||
|
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
|
||||||
|
|
||||||
|
cuda::exp(buf, dst, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
|
{
|
||||||
|
BufferPool pool(stream);
|
||||||
|
|
||||||
|
GpuMat buf1 = pool.getBuffer(src1.size(), CV_32FC(src1.channels()));
|
||||||
|
|
||||||
|
func1(src1, buf1, stream);
|
||||||
|
|
||||||
|
GpuMat buf2 = pool.getBuffer(src2.size(), CV_32FC(src2.channels()));
|
||||||
|
|
||||||
|
func1(src2, buf2, stream);
|
||||||
|
|
||||||
|
cuda::add(buf1, buf2, dst, noArray(), -1, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PERF_TEST_P(Sz, BufferPool, CUDA_TYPICAL_MAT_SIZES)
|
||||||
|
{
|
||||||
|
static bool first = true;
|
||||||
|
|
||||||
|
const Size size = GetParam();
|
||||||
|
|
||||||
|
const bool useBufferPool = PERF_RUN_CUDA();
|
||||||
|
|
||||||
|
Mat host_src(size, CV_8UC1);
|
||||||
|
declare.in(host_src, WARMUP_RNG);
|
||||||
|
|
||||||
|
GpuMat src1(host_src), src2(host_src);
|
||||||
|
GpuMat dst;
|
||||||
|
|
||||||
|
setBufferPoolUsage(useBufferPool);
|
||||||
|
if (useBufferPool && first)
|
||||||
|
{
|
||||||
|
setBufferPoolConfig(-1, 25 * 1024 * 1024, 2);
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CYCLE()
|
||||||
|
{
|
||||||
|
func2(src1, src2, dst, Stream::Null());
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat h_dst(dst);
|
||||||
|
SANITY_CHECK(h_dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
120
modules/cuda/test/test_buffer_pool.cpp
Normal file
120
modules/cuda/test/test_buffer_pool.cpp
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "test_precomp.hpp"
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
|
||||||
|
#include "opencv2/cudaarithm.hpp"
|
||||||
|
#include "opencv2/cudawarping.hpp"
|
||||||
|
#include "opencv2/core/private.cuda.hpp"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::cuda;
|
||||||
|
|
||||||
|
struct BufferPoolTest : TestWithParam<DeviceInfo>
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||||
|
{
|
||||||
|
BufferPool pool(stream);
|
||||||
|
|
||||||
|
GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels()));
|
||||||
|
|
||||||
|
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
|
||||||
|
|
||||||
|
cuda::exp(buf, dst, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
void func2(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||||
|
{
|
||||||
|
BufferPool pool(stream);
|
||||||
|
|
||||||
|
GpuMat buf1 = pool.getBuffer(saturate_cast<int>(src.rows * 0.5), saturate_cast<int>(src.cols * 0.5), src.type());
|
||||||
|
|
||||||
|
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream);
|
||||||
|
|
||||||
|
GpuMat buf2 = pool.getBuffer(buf1.size(), CV_32FC(buf1.channels()));
|
||||||
|
|
||||||
|
func1(buf1, buf2, stream);
|
||||||
|
|
||||||
|
GpuMat buf3 = pool.getBuffer(src.size(), buf2.type());
|
||||||
|
|
||||||
|
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream);
|
||||||
|
|
||||||
|
buf3.convertTo(dst, CV_8U, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CUDA_TEST_P(BufferPoolTest, SimpleUsage)
|
||||||
|
{
|
||||||
|
DeviceInfo devInfo = GetParam();
|
||||||
|
setDevice(devInfo.deviceID());
|
||||||
|
|
||||||
|
GpuMat src(200, 200, CV_8UC1);
|
||||||
|
GpuMat dst;
|
||||||
|
|
||||||
|
Stream stream;
|
||||||
|
|
||||||
|
func2(src, dst, stream);
|
||||||
|
|
||||||
|
stream.waitForCompletion();
|
||||||
|
|
||||||
|
GpuMat buf, buf1, buf2, buf3;
|
||||||
|
GpuMat dst_gold;
|
||||||
|
|
||||||
|
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST);
|
||||||
|
buf1.convertTo(buf, CV_32F, 1.0 / 255.0);
|
||||||
|
cuda::exp(buf, buf2);
|
||||||
|
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST);
|
||||||
|
buf3.convertTo(dst_gold, CV_8U);
|
||||||
|
|
||||||
|
ASSERT_MAT_NEAR(dst_gold, dst, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPoolTest, ALL_DEVICES);
|
||||||
|
|
||||||
|
#endif // HAVE_CUDA
|
Loading…
x
Reference in New Issue
Block a user