fixed bugs in page locked memory allocation
avoid extra gpu memory allocation in BP and CSBP
This commit is contained in:
@@ -234,7 +234,7 @@ namespace
|
||||
if (disp.empty())
|
||||
disp.create(rows, cols, CV_16S);
|
||||
|
||||
out = ((disp.type() == CV_16S) ? disp : GpuMat(rows, cols, CV_16S));
|
||||
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
||||
out = zero;
|
||||
|
||||
bp::output(rthis.msg_type, u, d, l, r, datas.front(), disp, stream);
|
||||
|
@@ -251,7 +251,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
||||
if (disp.empty())
|
||||
disp.create(rows, cols, CV_16S);
|
||||
|
||||
out = ((disp.type() == CV_16S) ? disp : GpuMat(rows, cols, CV_16S));
|
||||
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
||||
out = zero;
|
||||
|
||||
csbp::compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||
|
@@ -57,8 +57,8 @@ Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); re
|
||||
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }
|
||||
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, CudaMem& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
|
||||
@@ -150,9 +150,9 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() )
|
||||
devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost);
|
||||
}
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
|
||||
|
||||
|
@@ -67,9 +67,9 @@ namespace cv
|
||||
void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
|
||||
void GpuMat::release() { throw_nogpu(); }
|
||||
|
||||
void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
|
||||
bool MatPL::can_device_map_to_host() { throw_nogpu(); return false; }
|
||||
void MatPL::release() { throw_nogpu(); }
|
||||
void CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
|
||||
bool CudaMem::can_device_map_to_host() { throw_nogpu(); return false; }
|
||||
void CudaMem::release() { throw_nogpu(); }
|
||||
}
|
||||
|
||||
}
|
||||
@@ -83,7 +83,7 @@ void cv::gpu::GpuMat::upload(const Mat& m)
|
||||
cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
|
||||
}
|
||||
|
||||
void cv::gpu::GpuMat::upload(const MatPL& m, Stream& stream)
|
||||
void cv::gpu::GpuMat::upload(const CudaMem& m, Stream& stream)
|
||||
{
|
||||
CV_DbgAssert(!m.empty());
|
||||
stream.enqueueUpload(m, *this);
|
||||
@@ -96,7 +96,7 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const
|
||||
cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
|
||||
void cv::gpu::GpuMat::download(MatPL& m, Stream& stream) const
|
||||
void cv::gpu::GpuMat::download(CudaMem& m, Stream& stream) const
|
||||
{
|
||||
CV_DbgAssert(!m.empty());
|
||||
stream.enqueueDownload(*this, m);
|
||||
@@ -210,15 +210,6 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
|
||||
return hdr;
|
||||
}
|
||||
|
||||
bool cv::gpu::MatPL::can_device_map_to_host()
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
|
||||
return (prop.canMapHostMemory != 0) ? true : false;
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
|
||||
{
|
||||
_type &= TYPE_MASK;
|
||||
@@ -266,12 +257,21 @@ void cv::gpu::GpuMat::release()
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////// MatPL ////////////////////////////////
|
||||
//////////////////////////////// CudaMem //////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
|
||||
bool cv::gpu::CudaMem::can_device_map_to_host()
|
||||
{
|
||||
alloc_type = type_alloc;
|
||||
cudaDeviceProp prop;
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
return (prop.canMapHostMemory != 0) ? true : false;
|
||||
}
|
||||
|
||||
void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
{
|
||||
if (_alloc_type == ALLOC_ZEROCOPY && !can_device_map_to_host())
|
||||
cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
|
||||
|
||||
_type &= TYPE_MASK;
|
||||
if( rows == _rows && cols == _cols && type() == _type && data )
|
||||
return;
|
||||
@@ -279,7 +279,7 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
|
||||
release();
|
||||
CV_DbgAssert( _rows >= 0 && _cols >= 0 );
|
||||
if( _rows > 0 && _cols > 0 )
|
||||
{
|
||||
{
|
||||
flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type;
|
||||
rows = _rows;
|
||||
cols = _cols;
|
||||
@@ -291,24 +291,15 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
|
||||
size_t datasize = alignSize(nettosize, (int)sizeof(*refcount));
|
||||
|
||||
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
|
||||
alloc_type = _alloc_type;
|
||||
void *ptr;
|
||||
|
||||
switch (type_alloc)
|
||||
|
||||
switch (alloc_type)
|
||||
{
|
||||
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY:
|
||||
if (can_device_map_to_host() == true)
|
||||
{
|
||||
cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );
|
||||
}
|
||||
else
|
||||
cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
|
||||
break;
|
||||
|
||||
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
|
||||
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
|
||||
default:
|
||||
cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
|
||||
default: cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
datastart = data = (uchar*)ptr;
|
||||
@@ -319,20 +310,22 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
|
||||
}
|
||||
}
|
||||
|
||||
inline MatPL::operator GpuMat() const
|
||||
inline CudaMem::operator GpuMat() const
|
||||
{
|
||||
GpuMat res;
|
||||
if (alloc_type == ALLOC_ZEROCOPY)
|
||||
{
|
||||
void ** pdev;
|
||||
cudaHostGetDevicePointer( pdev, this->data, 0 );
|
||||
GpuMat m(this->rows, this->cols, this->type(), *pdev, this->step);
|
||||
return m;
|
||||
void *pdev;
|
||||
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
|
||||
res = GpuMat(rows, cols, type(), pdev, step);
|
||||
}
|
||||
else
|
||||
cv::gpu::error("", __FILE__, __LINE__);
|
||||
cv::gpu::error("Zero-copy is not supported or memory was allocated without zero-copy flag", __FILE__, __LINE__);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void cv::gpu::MatPL::release()
|
||||
void cv::gpu::CudaMem::release()
|
||||
{
|
||||
if( refcount && CV_XADD(refcount, -1) == 1 )
|
||||
{
|
||||
|
Reference in New Issue
Block a user