Merge pull request #3531 from jet47:cuda-core-refactoring

This commit is contained in:
Vadim Pisarevsky
2014-12-26 12:12:42 +00:00
30 changed files with 487 additions and 441 deletions

View File

@@ -275,12 +275,12 @@ void cv::cuda::createContinuous(int rows, int cols, int type, OutputArray arr)
::createContinuousImpl(rows, cols, type, arr.getMatRef());
break;
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
::createContinuousImpl(rows, cols, type, arr.getGpuMatRef());
break;
case _InputArray::CUDA_MEM:
::createContinuousImpl(rows, cols, type, arr.getCudaMemRef());
case _InputArray::CUDA_HOST_MEM:
::createContinuousImpl(rows, cols, type, arr.getHostMemRef());
break;
default:
@@ -329,12 +329,12 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
::ensureSizeIsEnoughImpl(rows, cols, type, arr.getMatRef());
break;
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
::ensureSizeIsEnoughImpl(rows, cols, type, arr.getGpuMatRef());
break;
case _InputArray::CUDA_MEM:
::ensureSizeIsEnoughImpl(rows, cols, type, arr.getCudaMemRef());
case _InputArray::CUDA_HOST_MEM:
::ensureSizeIsEnoughImpl(rows, cols, type, arr.getHostMemRef());
break;
default:
@@ -342,14 +342,6 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
}
}
GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
{
if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
return mat(Rect(0, 0, cols, rows));
return mat = GpuMat(rows, cols, type);
}
#ifndef HAVE_CUDA
GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()

View File

@@ -42,10 +42,124 @@
//M*/
#include "precomp.hpp"
#include <map>
using namespace cv;
using namespace cv::cuda;
#ifdef HAVE_CUDA
namespace {
class HostMemAllocator : public MatAllocator
{
public:
explicit HostMemAllocator(unsigned int flags) : flags_(flags)
{
}
UMatData* allocate(int dims, const int* sizes, int type,
void* data0, size_t* step,
int /*flags*/, UMatUsageFlags /*usageFlags*/) const
{
size_t total = CV_ELEM_SIZE(type);
for (int i = dims-1; i >= 0; i--)
{
if (step)
{
if (data0 && step[i] != CV_AUTOSTEP)
{
CV_Assert(total <= step[i]);
total = step[i];
}
else
{
step[i] = total;
}
}
total *= sizes[i];
}
UMatData* u = new UMatData(this);
u->size = total;
if (data0)
{
u->data = u->origdata = static_cast<uchar*>(data0);
u->flags |= UMatData::USER_ALLOCATED;
}
else
{
void* ptr = 0;
cudaSafeCall( cudaHostAlloc(&ptr, total, flags_) );
u->data = u->origdata = static_cast<uchar*>(ptr);
}
return u;
}
bool allocate(UMatData* u, int /*accessFlags*/, UMatUsageFlags /*usageFlags*/) const
{
return (u != NULL);
}
void deallocate(UMatData* u) const
{
CV_Assert(u->urefcount >= 0);
CV_Assert(u->refcount >= 0);
if (u && u->refcount == 0)
{
if ( !(u->flags & UMatData::USER_ALLOCATED) )
{
cudaFreeHost(u->origdata);
u->origdata = 0;
}
delete u;
}
}
private:
unsigned int flags_;
};
} // namespace
#endif
MatAllocator* cv::cuda::HostMem::getAllocator(AllocType alloc_type)
{
#ifndef HAVE_CUDA
(void) alloc_type;
throw_no_cuda();
return NULL;
#else
static std::map<unsigned int, Ptr<MatAllocator> > allocators;
unsigned int flag = cudaHostAllocDefault;
switch (alloc_type)
{
case PAGE_LOCKED: flag = cudaHostAllocDefault; break;
case SHARED: flag = cudaHostAllocMapped; break;
case WRITE_COMBINED: flag = cudaHostAllocWriteCombined; break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
}
Ptr<MatAllocator>& a = allocators[flag];
if (a.empty())
{
a = makePtr<HostMemAllocator>(flag);
}
return a.get();
#endif
}
#ifdef HAVE_CUDA
namespace
{
@@ -59,7 +173,7 @@ namespace
}
#endif
void cv::cuda::CudaMem::create(int rows_, int cols_, int type_)
void cv::cuda::HostMem::create(int rows_, int cols_, int type_)
{
#ifndef HAVE_CUDA
(void) rows_;
@@ -123,9 +237,9 @@ void cv::cuda::CudaMem::create(int rows_, int cols_, int type_)
#endif
}
CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const
HostMem cv::cuda::HostMem::reshape(int new_cn, int new_rows) const
{
CudaMem hdr = *this;
HostMem hdr = *this;
int cn = channels();
if (new_cn == 0)
@@ -166,7 +280,7 @@ CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const
return hdr;
}
void cv::cuda::CudaMem::release()
void cv::cuda::HostMem::release()
{
#ifdef HAVE_CUDA
if (refcount && CV_XADD(refcount, -1) == 1)
@@ -181,7 +295,7 @@ void cv::cuda::CudaMem::release()
#endif
}
GpuMat cv::cuda::CudaMem::createGpuMatHeader() const
GpuMat cv::cuda::HostMem::createGpuMatHeader() const
{
#ifndef HAVE_CUDA
throw_no_cuda();

View File

@@ -1187,18 +1187,18 @@ Mat _InputArray::getMat(int i) const
return Mat();
}
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
{
CV_Assert( i < 0 );
CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for cuda::GpuMat object");
return Mat();
}
if( k == CUDA_MEM )
if( k == CUDA_HOST_MEM )
{
CV_Assert( i < 0 );
const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj;
return cuda_mem->createMatHeader();
}
@@ -1391,15 +1391,15 @@ cuda::GpuMat _InputArray::getGpuMat() const
{
int k = kind();
if (k == GPU_MAT)
if (k == CUDA_GPU_MAT)
{
const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj;
return *d_mat;
}
if (k == CUDA_MEM)
if (k == CUDA_HOST_MEM)
{
const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj;
return cuda_mem->createGpuMatHeader();
}
@@ -1412,7 +1412,7 @@ cuda::GpuMat _InputArray::getGpuMat() const
if (k == NONE)
return cuda::GpuMat();
CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::CudaMem");
CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem");
return cuda::GpuMat();
}
@@ -1520,20 +1520,22 @@ Size _InputArray::size(int i) const
return buf->size();
}
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
{
CV_Assert( i < 0 );
const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj;
return d_mat->size();
}
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
if( k == CUDA_HOST_MEM )
{
CV_Assert( i < 0 );
const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj;
return cuda_mem->size();
}
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
return Size();
}
int _InputArray::sizend(int* arrsz, int i) const
@@ -1700,18 +1702,20 @@ int _InputArray::dims(int i) const
return 2;
}
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
{
CV_Assert( i < 0 );
return 2;
}
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
if( k == CUDA_HOST_MEM )
{
CV_Assert( i < 0 );
return 2;
}
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
return 0;
}
size_t _InputArray::total(int i) const
@@ -1799,12 +1803,14 @@ int _InputArray::type(int i) const
if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->type();
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
return ((const cuda::GpuMat*)obj)->type();
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
return ((const cuda::CudaMem*)obj)->type();
if( k == CUDA_HOST_MEM )
return ((const cuda::HostMem*)obj)->type();
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
return 0;
}
int _InputArray::depth(int i) const
@@ -1863,12 +1869,14 @@ bool _InputArray::empty() const
if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->empty();
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
return ((const cuda::GpuMat*)obj)->empty();
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
return ((const cuda::CudaMem*)obj)->empty();
if( k == CUDA_HOST_MEM )
return ((const cuda::HostMem*)obj)->empty();
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
return true;
}
bool _InputArray::isContinuous(int i) const
@@ -1970,7 +1978,7 @@ size_t _InputArray::offset(int i) const
return vv[i].offset;
}
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
{
CV_Assert( i < 0 );
const cuda::GpuMat * const m = ((const cuda::GpuMat*)obj);
@@ -2016,7 +2024,7 @@ size_t _InputArray::step(int i) const
return vv[i].step;
}
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
{
CV_Assert( i < 0 );
return ((const cuda::GpuMat*)obj)->step;
@@ -2095,7 +2103,7 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
((UMat*)obj)->create(_sz, mtype);
return;
}
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
if( k == CUDA_GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
@@ -2109,11 +2117,11 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
((ogl::Buffer*)obj)->create(_sz, mtype);
return;
}
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
((cuda::CudaMem*)obj)->create(_sz, mtype);
CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
((cuda::HostMem*)obj)->create(_sz, mtype);
return;
}
int sizes[] = {_sz.height, _sz.width};
@@ -2137,7 +2145,7 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran
((UMat*)obj)->create(_rows, _cols, mtype);
return;
}
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
if( k == CUDA_GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
@@ -2151,11 +2159,11 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran
((ogl::Buffer*)obj)->create(_rows, _cols, mtype);
return;
}
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
((cuda::CudaMem*)obj)->create(_rows, _cols, mtype);
CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
((cuda::HostMem*)obj)->create(_rows, _cols, mtype);
return;
}
int sizes[] = {_rows, _cols};
@@ -2479,15 +2487,15 @@ void _OutputArray::release() const
return;
}
if( k == GPU_MAT )
if( k == CUDA_GPU_MAT )
{
((cuda::GpuMat*)obj)->release();
return;
}
if( k == CUDA_MEM )
if( k == CUDA_HOST_MEM )
{
((cuda::CudaMem*)obj)->release();
((cuda::HostMem*)obj)->release();
return;
}
@@ -2583,7 +2591,7 @@ UMat& _OutputArray::getUMatRef(int i) const
cuda::GpuMat& _OutputArray::getGpuMatRef() const
{
int k = kind();
CV_Assert( k == GPU_MAT );
CV_Assert( k == CUDA_GPU_MAT );
return *(cuda::GpuMat*)obj;
}
@@ -2594,11 +2602,11 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
return *(ogl::Buffer*)obj;
}
cuda::CudaMem& _OutputArray::getCudaMemRef() const
cuda::HostMem& _OutputArray::getHostMemRef() const
{
int k = kind();
CV_Assert( k == CUDA_MEM );
return *(cuda::CudaMem*)obj;
CV_Assert( k == CUDA_HOST_MEM );
return *(cuda::HostMem*)obj;
}
void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const
@@ -2614,10 +2622,10 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const
}
else if( k == UMAT )
((UMat*)obj)->setTo(arr, mask);
else if( k == GPU_MAT )
else if( k == CUDA_GPU_MAT )
{
Mat value = arr.getMat();
CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::GPU_MAT) );
CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) );
((cuda::GpuMat*)obj)->setTo(Scalar(Vec<double, 4>(value.ptr<double>())), mask);
}
else

View File

@@ -509,7 +509,7 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_
switch (kind)
{
case _InputArray::OPENGL_BUFFER:
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
copyFrom(arr, target, autoRelease);
break;
@@ -594,7 +594,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
break;
}
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
{
#ifndef HAVE_CUDA
throw_no_cuda();
@@ -657,7 +657,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr) const
break;
}
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
{
#ifndef HAVE_CUDA
throw_no_cuda();
@@ -1018,7 +1018,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
break;
}
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
{
#ifndef HAVE_CUDA
throw_no_cuda();
@@ -1132,7 +1132,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
break;
}
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
{
#ifndef HAVE_CUDA
throw_no_cuda();
@@ -1184,7 +1184,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
break;
}
case _InputArray::GPU_MAT:
case _InputArray::CUDA_GPU_MAT:
{
#ifndef HAVE_CUDA
throw_no_cuda();