Merge pull request #3531 from jet47:cuda-core-refactoring

2014-12-26 12:12:42 +00:00
parent f85838087a f36546dbd2
commit 0ff67253f7
30 changed files with 487 additions and 441 deletions
--- a/modules/core/src/cuda_gpu_mat.cpp
+++ b/modules/core/src/cuda_gpu_mat.cpp
@@ -275,12 +275,12 @@ void cv::cuda::createContinuous(int rows, int cols, int type, OutputArray arr)
        ::createContinuousImpl(rows, cols, type, arr.getMatRef());
        break;

-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        ::createContinuousImpl(rows, cols, type, arr.getGpuMatRef());
        break;

-    case _InputArray::CUDA_MEM:
-        ::createContinuousImpl(rows, cols, type, arr.getCudaMemRef());
+    case _InputArray::CUDA_HOST_MEM:
+        ::createContinuousImpl(rows, cols, type, arr.getHostMemRef());
        break;

    default:
@@ -329,12 +329,12 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
        ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getMatRef());
        break;

-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getGpuMatRef());
        break;

-    case _InputArray::CUDA_MEM:
-        ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getCudaMemRef());
+    case _InputArray::CUDA_HOST_MEM:
+        ::ensureSizeIsEnoughImpl(rows, cols, type, arr.getHostMemRef());
        break;

    default:
@@ -342,14 +342,6 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
    }
 }

-GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
-{
-    if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
-        return mat(Rect(0, 0, cols, rows));
-
-    return mat = GpuMat(rows, cols, type);
-}
-
 #ifndef HAVE_CUDA

 GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()
--- a/modules/core/src/cuda_host_mem.cpp
+++ b/modules/core/src/cuda_host_mem.cpp
@@ -42,10 +42,124 @@
 //M*/

 #include "precomp.hpp"
+#include <map>

 using namespace cv;
 using namespace cv::cuda;

+#ifdef HAVE_CUDA
+
+namespace {
+
+class HostMemAllocator : public MatAllocator
+{
+public:
+    explicit HostMemAllocator(unsigned int flags) : flags_(flags)
+    {
+    }
+
+    UMatData* allocate(int dims, const int* sizes, int type,
+                       void* data0, size_t* step,
+                       int /*flags*/, UMatUsageFlags /*usageFlags*/) const
+    {
+        size_t total = CV_ELEM_SIZE(type);
+        for (int i = dims-1; i >= 0; i--)
+        {
+            if (step)
+            {
+                if (data0 && step[i] != CV_AUTOSTEP)
+                {
+                    CV_Assert(total <= step[i]);
+                    total = step[i];
+                }
+                else
+                {
+                    step[i] = total;
+                }
+            }
+
+            total *= sizes[i];
+        }
+
+        UMatData* u = new UMatData(this);
+        u->size = total;
+
+        if (data0)
+        {
+            u->data = u->origdata = static_cast<uchar*>(data0);
+            u->flags |= UMatData::USER_ALLOCATED;
+        }
+        else
+        {
+            void* ptr = 0;
+            cudaSafeCall( cudaHostAlloc(&ptr, total, flags_) );
+
+            u->data = u->origdata = static_cast<uchar*>(ptr);
+        }
+
+        return u;
+    }
+
+    bool allocate(UMatData* u, int /*accessFlags*/, UMatUsageFlags /*usageFlags*/) const
+    {
+        return (u != NULL);
+    }
+
+    void deallocate(UMatData* u) const
+    {
+        CV_Assert(u->urefcount >= 0);
+        CV_Assert(u->refcount >= 0);
+
+        if (u && u->refcount == 0)
+        {
+            if ( !(u->flags & UMatData::USER_ALLOCATED) )
+            {
+                cudaFreeHost(u->origdata);
+                u->origdata = 0;
+            }
+
+            delete u;
+        }
+    }
+
+private:
+    unsigned int flags_;
+};
+
+} // namespace
+
+#endif
+
+MatAllocator* cv::cuda::HostMem::getAllocator(AllocType alloc_type)
+{
+#ifndef HAVE_CUDA
+    (void) alloc_type;
+    throw_no_cuda();
+    return NULL;
+#else
+    static std::map<unsigned int, Ptr<MatAllocator> > allocators;
+
+    unsigned int flag = cudaHostAllocDefault;
+
+    switch (alloc_type)
+    {
+    case PAGE_LOCKED:    flag = cudaHostAllocDefault; break;
+    case SHARED:         flag = cudaHostAllocMapped;  break;
+    case WRITE_COMBINED: flag = cudaHostAllocWriteCombined; break;
+    default:             CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
+    }
+
+    Ptr<MatAllocator>& a = allocators[flag];
+
+    if (a.empty())
+    {
+        a = makePtr<HostMemAllocator>(flag);
+    }
+
+    return a.get();
+#endif
+}
+
 #ifdef HAVE_CUDA
 namespace
 {
@@ -59,7 +173,7 @@ namespace
 }
 #endif

-void cv::cuda::CudaMem::create(int rows_, int cols_, int type_)
+void cv::cuda::HostMem::create(int rows_, int cols_, int type_)
 {
 #ifndef HAVE_CUDA
    (void) rows_;
@@ -123,9 +237,9 @@ void cv::cuda::CudaMem::create(int rows_, int cols_, int type_)
 #endif
 }

-CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const
+HostMem cv::cuda::HostMem::reshape(int new_cn, int new_rows) const
 {
-    CudaMem hdr = *this;
+    HostMem hdr = *this;

    int cn = channels();
    if (new_cn == 0)
@@ -166,7 +280,7 @@ CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const
    return hdr;
 }

-void cv::cuda::CudaMem::release()
+void cv::cuda::HostMem::release()
 {
 #ifdef HAVE_CUDA
    if (refcount && CV_XADD(refcount, -1) == 1)
@@ -181,7 +295,7 @@ void cv::cuda::CudaMem::release()
 #endif
 }

-GpuMat cv::cuda::CudaMem::createGpuMatHeader() const
+GpuMat cv::cuda::HostMem::createGpuMatHeader() const
 {
 #ifndef HAVE_CUDA
    throw_no_cuda();
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -1187,18 +1187,18 @@ Mat _InputArray::getMat(int i) const
        return Mat();
    }

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
    {
        CV_Assert( i < 0 );
        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for cuda::GpuMat object");
        return Mat();
    }

-    if( k == CUDA_MEM )
+    if( k == CUDA_HOST_MEM )
    {
        CV_Assert( i < 0 );

-        const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
+        const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj;

        return cuda_mem->createMatHeader();
    }
@@ -1391,15 +1391,15 @@ cuda::GpuMat _InputArray::getGpuMat() const
 {
    int k = kind();

-    if (k == GPU_MAT)
+    if (k == CUDA_GPU_MAT)
    {
        const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj;
        return *d_mat;
    }

-    if (k == CUDA_MEM)
+    if (k == CUDA_HOST_MEM)
    {
-        const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
+        const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj;
        return cuda_mem->createGpuMatHeader();
    }

@@ -1412,7 +1412,7 @@ cuda::GpuMat _InputArray::getGpuMat() const
    if (k == NONE)
        return cuda::GpuMat();

-    CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::CudaMem");
+    CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem");
    return cuda::GpuMat();
 }

@@ -1520,20 +1520,22 @@ Size _InputArray::size(int i) const
        return buf->size();
    }

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
    {
        CV_Assert( i < 0 );
        const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj;
        return d_mat->size();
    }

-    CV_Assert( k == CUDA_MEM );
-    //if( k == CUDA_MEM )
+    if( k == CUDA_HOST_MEM )
    {
        CV_Assert( i < 0 );
-        const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
+        const cuda::HostMem* cuda_mem = (const cuda::HostMem*)obj;
        return cuda_mem->size();
    }
+
+    CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
+    return Size();
 }

 int _InputArray::sizend(int* arrsz, int i) const
@@ -1700,18 +1702,20 @@ int _InputArray::dims(int i) const
        return 2;
    }

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
    {
        CV_Assert( i < 0 );
        return 2;
    }

-    CV_Assert( k == CUDA_MEM );
-    //if( k == CUDA_MEM )
+    if( k == CUDA_HOST_MEM )
    {
        CV_Assert( i < 0 );
        return 2;
    }
+
+    CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
+    return 0;
 }

 size_t _InputArray::total(int i) const
@@ -1799,12 +1803,14 @@ int _InputArray::type(int i) const
    if( k == OPENGL_BUFFER )
        return ((const ogl::Buffer*)obj)->type();

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
        return ((const cuda::GpuMat*)obj)->type();

-    CV_Assert( k == CUDA_MEM );
-    //if( k == CUDA_MEM )
-        return ((const cuda::CudaMem*)obj)->type();
+    if( k == CUDA_HOST_MEM )
+        return ((const cuda::HostMem*)obj)->type();
+
+    CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
+    return 0;
 }

 int _InputArray::depth(int i) const
@@ -1863,12 +1869,14 @@ bool _InputArray::empty() const
    if( k == OPENGL_BUFFER )
        return ((const ogl::Buffer*)obj)->empty();

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
        return ((const cuda::GpuMat*)obj)->empty();

-    CV_Assert( k == CUDA_MEM );
-    //if( k == CUDA_MEM )
-        return ((const cuda::CudaMem*)obj)->empty();
+    if( k == CUDA_HOST_MEM )
+        return ((const cuda::HostMem*)obj)->empty();
+
+    CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
+    return true;
 }

 bool _InputArray::isContinuous(int i) const
@@ -1970,7 +1978,7 @@ size_t _InputArray::offset(int i) const
        return vv[i].offset;
    }

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
    {
        CV_Assert( i < 0 );
        const cuda::GpuMat * const m = ((const cuda::GpuMat*)obj);
@@ -2016,7 +2024,7 @@ size_t _InputArray::step(int i) const
        return vv[i].step;
    }

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
    {
        CV_Assert( i < 0 );
        return ((const cuda::GpuMat*)obj)->step;
@@ -2095,7 +2103,7 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
        ((UMat*)obj)->create(_sz, mtype);
        return;
    }
-    if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    if( k == CUDA_GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
    {
        CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz);
        CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
@@ -2109,11 +2117,11 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
        ((ogl::Buffer*)obj)->create(_sz, mtype);
        return;
    }
-    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
    {
-        CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == _sz);
-        CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
-        ((cuda::CudaMem*)obj)->create(_sz, mtype);
+        CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz);
+        CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
+        ((cuda::HostMem*)obj)->create(_sz, mtype);
        return;
    }
    int sizes[] = {_sz.height, _sz.width};
@@ -2137,7 +2145,7 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran
        ((UMat*)obj)->create(_rows, _cols, mtype);
        return;
    }
-    if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    if( k == CUDA_GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
    {
        CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows));
        CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
@@ -2151,11 +2159,11 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran
        ((ogl::Buffer*)obj)->create(_rows, _cols, mtype);
        return;
    }
-    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
    {
-        CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(_cols, _rows));
-        CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
-        ((cuda::CudaMem*)obj)->create(_rows, _cols, mtype);
+        CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows));
+        CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
+        ((cuda::HostMem*)obj)->create(_rows, _cols, mtype);
        return;
    }
    int sizes[] = {_rows, _cols};
@@ -2479,15 +2487,15 @@ void _OutputArray::release() const
        return;
    }

-    if( k == GPU_MAT )
+    if( k == CUDA_GPU_MAT )
    {
        ((cuda::GpuMat*)obj)->release();
        return;
    }

-    if( k == CUDA_MEM )
+    if( k == CUDA_HOST_MEM )
    {
-        ((cuda::CudaMem*)obj)->release();
+        ((cuda::HostMem*)obj)->release();
        return;
    }

@@ -2583,7 +2591,7 @@ UMat& _OutputArray::getUMatRef(int i) const
 cuda::GpuMat& _OutputArray::getGpuMatRef() const
 {
    int k = kind();
-    CV_Assert( k == GPU_MAT );
+    CV_Assert( k == CUDA_GPU_MAT );
    return *(cuda::GpuMat*)obj;
 }

@@ -2594,11 +2602,11 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
    return *(ogl::Buffer*)obj;
 }

-cuda::CudaMem& _OutputArray::getCudaMemRef() const
+cuda::HostMem& _OutputArray::getHostMemRef() const
 {
    int k = kind();
-    CV_Assert( k == CUDA_MEM );
-    return *(cuda::CudaMem*)obj;
+    CV_Assert( k == CUDA_HOST_MEM );
+    return *(cuda::HostMem*)obj;
 }

 void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const
@@ -2614,10 +2622,10 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const
    }
    else if( k == UMAT )
        ((UMat*)obj)->setTo(arr, mask);
-    else if( k == GPU_MAT )
+    else if( k == CUDA_GPU_MAT )
    {
        Mat value = arr.getMat();
-        CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::GPU_MAT) );
+        CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) );
        ((cuda::GpuMat*)obj)->setTo(Scalar(Vec<double, 4>(value.ptr<double>())), mask);
    }
    else
--- a/modules/core/src/opengl.cpp
+++ b/modules/core/src/opengl.cpp
@@ -509,7 +509,7 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_
    switch (kind)
    {
    case _InputArray::OPENGL_BUFFER:
-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        copyFrom(arr, target, autoRelease);
        break;

@@ -594,7 +594,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
            break;
        }

-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        {
            #ifndef HAVE_CUDA
                throw_no_cuda();
@@ -657,7 +657,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr) const
            break;
        }

-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        {
            #ifndef HAVE_CUDA
                throw_no_cuda();
@@ -1018,7 +1018,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
            break;
        }

-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        {
            #ifndef HAVE_CUDA
                throw_no_cuda();
@@ -1132,7 +1132,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
            break;
        }

-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        {
            #ifndef HAVE_CUDA
                throw_no_cuda();
@@ -1184,7 +1184,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
            break;
        }

-    case _InputArray::GPU_MAT:
+    case _InputArray::CUDA_GPU_MAT:
        {
            #ifndef HAVE_CUDA
                throw_no_cuda();