added implement MatPL with serocopy and writecombited
This commit is contained in:
parent
024283ceae
commit
35ebeb21bd
@ -233,17 +233,18 @@ namespace cv
|
||||
{
|
||||
public:
|
||||
|
||||
//Not supported. Now behaviour is like ALLOC_DEFAULT.
|
||||
//enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
|
||||
//Supported. Now behaviour is like ALLOC_DEFAULT.
|
||||
enum { ALLOC_PAGE_LOCKED = 0, ALLOC_ZEROCOPY = 1, ALLOC_WRITE_COMBINED = 4 };
|
||||
|
||||
MatPL();
|
||||
MatPL(const MatPL& m);
|
||||
|
||||
MatPL(int _rows, int _cols, int _type);
|
||||
MatPL(Size _size, int _type);
|
||||
MatPL(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
|
||||
MatPL(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
|
||||
|
||||
|
||||
//! creates from cv::Mat with coping data
|
||||
explicit MatPL(const Mat& m);
|
||||
explicit MatPL(const Mat& m, int type_alloc = ALLOC_PAGE_LOCKED);
|
||||
|
||||
~MatPL();
|
||||
|
||||
@ -253,8 +254,8 @@ namespace cv
|
||||
MatPL clone() const;
|
||||
|
||||
//! allocates new matrix data unless the matrix already has specified size and type.
|
||||
void create(int _rows, int _cols, int _type);
|
||||
void create(Size _size, int _type);
|
||||
void create(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
|
||||
void create(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
|
||||
|
||||
//! decrements reference counter and released memory if needed.
|
||||
void release();
|
||||
@ -263,6 +264,11 @@ namespace cv
|
||||
Mat createMatHeader() const;
|
||||
operator Mat() const;
|
||||
|
||||
operator GpuMat() const;
|
||||
|
||||
static bool can_device_map_to_host();
|
||||
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
bool isContinuous() const;
|
||||
size_t elemSize() const;
|
||||
@ -274,16 +280,20 @@ namespace cv
|
||||
Size size() const;
|
||||
bool empty() const;
|
||||
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
int flags;
|
||||
int rows, cols;
|
||||
size_t step;
|
||||
|
||||
int alloc_type;
|
||||
|
||||
uchar* data;
|
||||
int* refcount;
|
||||
|
||||
uchar* datastart;
|
||||
uchar* dataend;
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////// CudaStream ////////////////////////////////
|
||||
@ -332,7 +342,7 @@ namespace cv
|
||||
|
||||
CV_EXPORTS void remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst);
|
||||
|
||||
|
||||
|
||||
CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
|
||||
|
||||
//////////////////////////////// StereoBM_GPU ////////////////////////////////
|
||||
@ -374,9 +384,9 @@ namespace cv
|
||||
private:
|
||||
GpuMat minSSD, leBuf, riBuf;
|
||||
};
|
||||
|
||||
|
||||
////////////////////////// StereoBeliefPropagation ///////////////////////////
|
||||
|
||||
|
||||
class CV_EXPORTS StereoBeliefPropagation
|
||||
{
|
||||
public:
|
||||
@ -385,15 +395,15 @@ namespace cv
|
||||
enum { DEFAULT_LEVELS = 5 };
|
||||
|
||||
//! the default constructor
|
||||
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
//! the full constructor taking the number of disparities, number of BP iterations on each level,
|
||||
//! number of levels, truncation of data cost, data weight,
|
||||
//! number of levels, truncation of data cost, data weight,
|
||||
//! truncation of discontinuity cost and discontinuity single jump
|
||||
StereoBeliefPropagation(int ndisp, int iters, int levels,
|
||||
StereoBeliefPropagation(int ndisp, int iters, int levels,
|
||||
float max_data_term, float data_weight,
|
||||
float max_disc_term, float disc_single_jump,
|
||||
int msg_type = CV_32F);
|
||||
@ -401,29 +411,29 @@ namespace cv
|
||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
||||
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);
|
||||
|
||||
|
||||
//! Acync version
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream);
|
||||
|
||||
|
||||
int ndisp;
|
||||
|
||||
int iters;
|
||||
int levels;
|
||||
|
||||
float max_data_term;
|
||||
|
||||
float max_data_term;
|
||||
float data_weight;
|
||||
float max_disc_term;
|
||||
float max_disc_term;
|
||||
float disc_single_jump;
|
||||
|
||||
|
||||
int msg_type;
|
||||
private:
|
||||
GpuMat u, d, l, r, u2, d2, l2, r2;
|
||||
std::vector<GpuMat> datas;
|
||||
std::vector<GpuMat> datas;
|
||||
GpuMat out;
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////// StereoConstantSpaceBP ///////////////////////////
|
||||
|
||||
|
||||
class CV_EXPORTS StereoConstantSpaceBP
|
||||
{
|
||||
public:
|
||||
@ -434,13 +444,13 @@ namespace cv
|
||||
|
||||
//! the default constructor
|
||||
explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int nr_plane = DEFAULT_NR_PLANE,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
//! the full constructor taking the number of disparities, number of BP iterations on each level,
|
||||
//! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
|
||||
//! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
|
||||
//! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold
|
||||
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
|
||||
float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
|
||||
@ -450,20 +460,20 @@ namespace cv
|
||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
||||
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);
|
||||
|
||||
|
||||
//! Acync version
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream);
|
||||
|
||||
|
||||
int ndisp;
|
||||
|
||||
int iters;
|
||||
int levels;
|
||||
|
||||
|
||||
int nr_plane;
|
||||
|
||||
float max_data_term;
|
||||
|
||||
float max_data_term;
|
||||
float data_weight;
|
||||
float max_disc_term;
|
||||
float max_disc_term;
|
||||
float disc_single_jump;
|
||||
|
||||
int min_disp_th;
|
||||
@ -483,7 +493,7 @@ namespace cv
|
||||
}
|
||||
|
||||
//! Speckle filtering - filters small connected components on diparity image.
|
||||
//! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.
|
||||
//! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.
|
||||
//! Threshold for border between CC is diffThreshold;
|
||||
void filterSpeckles( Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf);
|
||||
|
||||
|
@ -343,29 +343,28 @@ static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline MatPL::MatPL() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}
|
||||
inline MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||
inline MatPL::MatPL(int _rows, int _cols, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||
{
|
||||
if( _rows > 0 && _cols > 0 )
|
||||
create( _rows, _cols, _type );
|
||||
create( _rows, _cols, _type , type_alloc);
|
||||
}
|
||||
|
||||
inline MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||
inline MatPL::MatPL(Size _size, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||
{
|
||||
if( _size.height > 0 && _size.width > 0 )
|
||||
create( _size.height, _size.width, _type );
|
||||
create( _size.height, _size.width, _type, type_alloc );
|
||||
}
|
||||
|
||||
inline MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0)
|
||||
{
|
||||
if( refcount )
|
||||
CV_XADD(refcount, 1);
|
||||
|
||||
}
|
||||
|
||||
inline MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||
inline MatPL::MatPL(const Mat& m, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||
{
|
||||
if( m.rows > 0 && m.cols > 0 )
|
||||
create( m.size(), m.type() );
|
||||
create( m.size(), m.type() , type_alloc);
|
||||
|
||||
Mat tmp = createMatHeader();
|
||||
m.copyTo(tmp);
|
||||
@ -375,6 +374,7 @@ inline MatPL::~MatPL()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
inline MatPL& MatPL::operator = (const MatPL& m)
|
||||
{
|
||||
if( this != &m )
|
||||
@ -388,6 +388,7 @@ inline MatPL& MatPL::operator = (const MatPL& m)
|
||||
datastart = m.datastart;
|
||||
dataend = m.dataend;
|
||||
refcount = m.refcount;
|
||||
alloc_type = m.alloc_type;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -401,7 +402,7 @@ inline MatPL MatPL::clone() const
|
||||
return m;
|
||||
}
|
||||
|
||||
inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); }
|
||||
inline void MatPL::create(Size _size, int _type, int type_alloc) { create(_size.height, _size.width, _type, type_alloc); }
|
||||
//CCP void MatPL::create(int _rows, int _cols, int _type);
|
||||
//CPP void MatPL::release();
|
||||
|
||||
|
@ -67,7 +67,8 @@ namespace cv
|
||||
void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
|
||||
void GpuMat::release() { throw_nogpu(); }
|
||||
|
||||
void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
|
||||
void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
|
||||
void MatPL::get_property_device() { throw_nogpu(); }
|
||||
void MatPL::release() { throw_nogpu(); }
|
||||
}
|
||||
|
||||
@ -164,7 +165,7 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
|
||||
else
|
||||
impl::set_to_with_mask( *this, depth(), s.val, mask, channels());
|
||||
|
||||
return *this;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
@ -209,6 +210,15 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
|
||||
return hdr;
|
||||
}
|
||||
|
||||
bool cv::gpu::MatPL::can_device_map_to_host()
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
|
||||
return (prop.canMapHostMemory != 0) ? true : false;
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
|
||||
{
|
||||
_type &= TYPE_MASK;
|
||||
@ -259,8 +269,9 @@ void cv::gpu::GpuMat::release()
|
||||
//////////////////////////////// MatPL ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
|
||||
void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
|
||||
{
|
||||
alloc_type = type_alloc;
|
||||
_type &= TYPE_MASK;
|
||||
if( rows == _rows && cols == _cols && type() == _type && data )
|
||||
return;
|
||||
@ -281,7 +292,24 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
|
||||
|
||||
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
|
||||
void *ptr;
|
||||
cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) );
|
||||
|
||||
switch (type_alloc)
|
||||
{
|
||||
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY:
|
||||
if (can_device_map_to_host() == true)
|
||||
{
|
||||
cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );
|
||||
}
|
||||
else
|
||||
cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
|
||||
break;
|
||||
|
||||
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
|
||||
default:
|
||||
cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
datastart = data = (uchar*)ptr;
|
||||
dataend = data + nettosize;
|
||||
@ -291,6 +319,19 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
|
||||
}
|
||||
}
|
||||
|
||||
inline MatPL::operator GpuMat() const
|
||||
{
|
||||
if (alloc_type == ALLOC_ZEROCOPY)
|
||||
{
|
||||
void ** pdev;
|
||||
cudaHostGetDevicePointer( pdev, this->data, 0 );
|
||||
GpuMat m(this->rows, this->cols, this->type(), *pdev, this->step);
|
||||
return m;
|
||||
}
|
||||
else
|
||||
cv::gpu::error("", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
void cv::gpu::MatPL::release()
|
||||
{
|
||||
if( refcount && CV_XADD(refcount, -1) == 1 )
|
||||
|
Loading…
x
Reference in New Issue
Block a user