moved common gpu utility functionality to gpu_private.hpp

2013-04-03 17:09:31 +04:00
parent 28b1e81883
commit 204a19b431
117 changed files with 1670 additions and 1721 deletions
--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -45,10 +45,8 @@

 #include <cuda_runtime.h>
 #include "opencv2/core/cuda_devptrs.hpp"
-
-#ifndef CV_PI
-    #define CV_PI   3.1415926535897932384626433832795
-#endif
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"

 #ifndef CV_PI_F
    #ifndef CV_PI
@@ -58,16 +56,22 @@
    #endif
 #endif

+namespace cv { namespace gpu { namespace cuda {
+    static inline void checkError(cudaError_t err, const char* file, const int line, const char* func)
+    {
+        if (cudaSuccess != err)
+            cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
+    }
+}}}
+
 #if defined(__GNUC__)
-    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
+    #define cvCudaSafeCall(expr)  cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, __func__)
 #else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__)
+    #define cvCudaSafeCall(expr)  cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, "")
 #endif

 namespace cv { namespace gpu
 {
-    void error(const char *error_string, const char *file, const int line, const char *func);
-
    template <typename T> static inline bool isAligned(const T* ptr, size_t size)
    {
        return reinterpret_cast<size_t>(ptr) % size == 0;
@@ -79,38 +83,32 @@ namespace cv { namespace gpu
    }
 }}

-static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
-{
-    if (cudaSuccess != err)
-        cv::gpu::error(cudaGetErrorString(err), file, line, func);
-}
-
 namespace cv { namespace gpu
 {
-    __host__ __device__ __forceinline__ int divUp(int total, int grain)
+    enum
    {
-        return (total + grain - 1) / grain;
-    }
-
-    namespace cuda
-    {
-        using cv::gpu::divUp;
+        BORDER_REFLECT101_GPU = 0,
+        BORDER_REPLICATE_GPU,
+        BORDER_CONSTANT_GPU,
+        BORDER_REFLECT_GPU,
+        BORDER_WRAP_GPU
+    };

 #ifdef __CUDACC__
-        typedef unsigned char uchar;
-        typedef unsigned short ushort;
-        typedef signed char schar;
-        #if defined (_WIN32) || defined (__APPLE__)
-            typedef unsigned int uint;
-        #endif
+    namespace cuda
+    {
+        __host__ __device__ __forceinline__ int divUp(int total, int grain)
+        {
+            return (total + grain - 1) / grain;
+        }

        template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
        {
            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
-            cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
+            cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
        }
-#endif // __CUDACC__
    }
+#endif // __CUDACC__
 }}


--- a/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
@@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);

                transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);

                transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }
        };
        template<> struct TransformDispatcher<true>
@@ -345,7 +345,7 @@ namespace cv { namespace gpu { namespace cuda
            {
                typedef TransformFunctorTraits<UnOp> ft;

-                StaticAssert<ft::smart_shift != 1>::check();
+                CV_StaticAssert(ft::smart_shift != 1, "");

                if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
@@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);

                transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace cuda
            {
                typedef TransformFunctorTraits<BinOp> ft;

-                StaticAssert<ft::smart_shift != 1>::check();
+                CV_StaticAssert(ft::smart_shift != 1, "");

                if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
                    !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
@@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);

                transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }
        };
    } // namespace transform_detail
--- a/modules/core/include/opencv2/core/cuda_devptrs.hpp
+++ b/modules/core/include/opencv2/core/cuda_devptrs.hpp
@@ -58,9 +58,6 @@ namespace cv
        // Simple lightweight structures that encapsulates information about an image on device.
        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile

-        template <bool expr> struct StaticAssert;
-        template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
-
        template<typename T> struct DevPtr
        {
            typedef T elem_type;
--- a/modules/core/include/opencv2/core/gpu_private.hpp
+++ b/modules/core/include/opencv2/core/gpu_private.hpp
@@ -0,0 +1,134 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
+#define __OPENCV_CORE_GPU_PRIVATE_HPP__
+
+#ifndef __OPENCV_BUILD
+#  error this is a private header which should not be used from outside of the OpenCV library
+#endif
+
+#include "cvconfig.h"
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"
+
+#ifdef HAVE_CUDA
+#  include <cuda.h>
+#  include <cuda_runtime.h>
+#  include <npp.h>
+#  include "opencv2/core/stream_accessor.hpp"
+#  include "opencv2/core/cuda/common.hpp"
+
+#  define CUDART_MINIMUM_REQUIRED_VERSION 4020
+
+#  if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
+#    error "Insufficient Cuda Runtime library version, please update it."
+#  endif
+
+#  if defined(CUDA_ARCH_BIN_OR_PTX_10)
+#    error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
+#  endif
+#endif
+
+namespace cv { namespace gpu {
+    CV_EXPORTS cv::String getNppErrorMessage(int code);
+
+    static inline void checkNppError(int code, const char* file, const int line, const char* func)
+    {
+        if (code < 0)
+            cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
+    }
+
+    // Converts CPU border extrapolation mode into GPU internal analogue.
+    // Returns true if the GPU analogue exists, false otherwise.
+    CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
+}}
+
+#ifndef HAVE_CUDA
+
+static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without GPU support"); }
+
+#else // HAVE_CUDA
+
+static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
+
+#if defined(__GNUC__)
+    #define nppSafeCall(expr)  cv::gpu::checkNppError(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+    #define nppSafeCall(expr)  cv::gpu::checkNppError(expr, __FILE__, __LINE__, "")
+#endif
+
+namespace cv { namespace gpu
+{
+    template<int n> struct NPPTypeTraits;
+    template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };
+    template<> struct NPPTypeTraits<CV_8S>  { typedef Npp8s npp_type; };
+    template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
+    template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
+    template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
+    template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
+    template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
+
+    class NppStreamHandler
+    {
+    public:
+        inline explicit NppStreamHandler(cudaStream_t newStream)
+        {
+            oldStream = nppGetStream();
+            nppSetStream(newStream);
+        }
+
+        inline ~NppStreamHandler()
+        {
+            nppSetStream(oldStream);
+        }
+
+    private:
+        cudaStream_t oldStream;
+    };
+}}
+
+#endif // HAVE_CUDA
+
+#endif // __OPENCV_CORE_GPU_PRIVATE_HPP__
--- a/modules/core/include/opencv2/core/gpumat.hpp
+++ b/modules/core/include/opencv2/core/gpumat.hpp
@@ -454,11 +454,6 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);

 CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat);

-////////////////////////////////////////////////////////////////////////
-// Error handling
-
-CV_EXPORTS void error(const char* error_string, const char* file, const int line, const char* func = "");
-
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
--- a/modules/core/include/opencv2/core/stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/stream_accessor.hpp
@@ -43,17 +43,20 @@
 #ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
 #define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__

-#include "opencv2/core/gpumat.hpp"
-#include "cuda_runtime_api.h"
+#include <cuda_runtime.h>
+#include "opencv2/core/cvdef.h"
+
+// This is only header file that depends on Cuda. All other headers are independent.
+// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
+// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
+// In this case you have to install Cuda Toolkit.

 namespace cv
 {
    namespace gpu
    {
-        // This is only header file that depends on Cuda. All other headers are independent.
-        // So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
-        // But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
-        // In this case you have to install Cuda Toolkit.
+        class Stream;
+
        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);