moved common gpu utility functionality to gpu_private.hpp

2013-04-03 17:09:31 +04:00
parent 28b1e81883
commit 204a19b431
117 changed files with 1670 additions and 1721 deletions
--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -45,10 +45,8 @@

 #include <cuda_runtime.h>
 #include "opencv2/core/cuda_devptrs.hpp"
-
-#ifndef CV_PI
-    #define CV_PI   3.1415926535897932384626433832795
-#endif
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"

 #ifndef CV_PI_F
    #ifndef CV_PI
@@ -58,16 +56,22 @@
    #endif
 #endif

+namespace cv { namespace gpu { namespace cuda {
+    static inline void checkError(cudaError_t err, const char* file, const int line, const char* func)
+    {
+        if (cudaSuccess != err)
+            cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
+    }
+}}}
+
 #if defined(__GNUC__)
-    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
+    #define cvCudaSafeCall(expr)  cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, __func__)
 #else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__)
+    #define cvCudaSafeCall(expr)  cv::gpu::cuda::checkError((expr), __FILE__, __LINE__, "")
 #endif

 namespace cv { namespace gpu
 {
-    void error(const char *error_string, const char *file, const int line, const char *func);
-
    template <typename T> static inline bool isAligned(const T* ptr, size_t size)
    {
        return reinterpret_cast<size_t>(ptr) % size == 0;
@@ -79,38 +83,32 @@ namespace cv { namespace gpu
    }
 }}

-static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
-{
-    if (cudaSuccess != err)
-        cv::gpu::error(cudaGetErrorString(err), file, line, func);
-}
-
 namespace cv { namespace gpu
 {
-    __host__ __device__ __forceinline__ int divUp(int total, int grain)
+    enum
    {
-        return (total + grain - 1) / grain;
-    }
-
-    namespace cuda
-    {
-        using cv::gpu::divUp;
+        BORDER_REFLECT101_GPU = 0,
+        BORDER_REPLICATE_GPU,
+        BORDER_CONSTANT_GPU,
+        BORDER_REFLECT_GPU,
+        BORDER_WRAP_GPU
+    };

 #ifdef __CUDACC__
-        typedef unsigned char uchar;
-        typedef unsigned short ushort;
-        typedef signed char schar;
-        #if defined (_WIN32) || defined (__APPLE__)
-            typedef unsigned int uint;
-        #endif
+    namespace cuda
+    {
+        __host__ __device__ __forceinline__ int divUp(int total, int grain)
+        {
+            return (total + grain - 1) / grain;
+        }

        template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
        {
            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
-            cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
+            cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
        }
-#endif // __CUDACC__
    }
+#endif // __CUDACC__
 }}


--- a/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
@@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);

                transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);

                transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }
        };
        template<> struct TransformDispatcher<true>
@@ -345,7 +345,7 @@ namespace cv { namespace gpu { namespace cuda
            {
                typedef TransformFunctorTraits<UnOp> ft;

-                StaticAssert<ft::smart_shift != 1>::check();
+                CV_StaticAssert(ft::smart_shift != 1, "");

                if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
@@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);

                transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace cuda
            {
                typedef TransformFunctorTraits<BinOp> ft;

-                StaticAssert<ft::smart_shift != 1>::check();
+                CV_StaticAssert(ft::smart_shift != 1, "");

                if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
                    !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
@@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cuda
                const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);

                transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
-                cudaSafeCall( cudaGetLastError() );
+                cvCudaSafeCall( cudaGetLastError() );

                if (stream == 0)
-                    cudaSafeCall( cudaDeviceSynchronize() );
+                    cvCudaSafeCall( cudaDeviceSynchronize() );
            }
        };
    } // namespace transform_detail
--- a/modules/core/include/opencv2/core/cuda_devptrs.hpp
+++ b/modules/core/include/opencv2/core/cuda_devptrs.hpp
@@ -58,9 +58,6 @@ namespace cv
        // Simple lightweight structures that encapsulates information about an image on device.
        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile

-        template <bool expr> struct StaticAssert;
-        template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
-
        template<typename T> struct DevPtr
        {
            typedef T elem_type;
--- a/modules/core/include/opencv2/core/gpu_private.hpp
+++ b/modules/core/include/opencv2/core/gpu_private.hpp
@@ -0,0 +1,134 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
+#define __OPENCV_CORE_GPU_PRIVATE_HPP__
+
+#ifndef __OPENCV_BUILD
+#  error this is a private header which should not be used from outside of the OpenCV library
+#endif
+
+#include "cvconfig.h"
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"
+
+#ifdef HAVE_CUDA
+#  include <cuda.h>
+#  include <cuda_runtime.h>
+#  include <npp.h>
+#  include "opencv2/core/stream_accessor.hpp"
+#  include "opencv2/core/cuda/common.hpp"
+
+#  define CUDART_MINIMUM_REQUIRED_VERSION 4020
+
+#  if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
+#    error "Insufficient Cuda Runtime library version, please update it."
+#  endif
+
+#  if defined(CUDA_ARCH_BIN_OR_PTX_10)
+#    error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
+#  endif
+#endif
+
+namespace cv { namespace gpu {
+    CV_EXPORTS cv::String getNppErrorMessage(int code);
+
+    static inline void checkNppError(int code, const char* file, const int line, const char* func)
+    {
+        if (code < 0)
+            cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
+    }
+
+    // Converts CPU border extrapolation mode into GPU internal analogue.
+    // Returns true if the GPU analogue exists, false otherwise.
+    CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
+}}
+
+#ifndef HAVE_CUDA
+
+static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without GPU support"); }
+
+#else // HAVE_CUDA
+
+static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
+
+#if defined(__GNUC__)
+    #define nppSafeCall(expr)  cv::gpu::checkNppError(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+    #define nppSafeCall(expr)  cv::gpu::checkNppError(expr, __FILE__, __LINE__, "")
+#endif
+
+namespace cv { namespace gpu
+{
+    template<int n> struct NPPTypeTraits;
+    template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };
+    template<> struct NPPTypeTraits<CV_8S>  { typedef Npp8s npp_type; };
+    template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
+    template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
+    template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
+    template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
+    template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
+
+    class NppStreamHandler
+    {
+    public:
+        inline explicit NppStreamHandler(cudaStream_t newStream)
+        {
+            oldStream = nppGetStream();
+            nppSetStream(newStream);
+        }
+
+        inline ~NppStreamHandler()
+        {
+            nppSetStream(oldStream);
+        }
+
+    private:
+        cudaStream_t oldStream;
+    };
+}}
+
+#endif // HAVE_CUDA
+
+#endif // __OPENCV_CORE_GPU_PRIVATE_HPP__
--- a/modules/core/include/opencv2/core/gpumat.hpp
+++ b/modules/core/include/opencv2/core/gpumat.hpp
@@ -454,11 +454,6 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);

 CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat);

-////////////////////////////////////////////////////////////////////////
-// Error handling
-
-CV_EXPORTS void error(const char* error_string, const char* file, const int line, const char* func = "");
-
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
--- a/modules/core/include/opencv2/core/stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/stream_accessor.hpp
@@ -43,17 +43,20 @@
 #ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
 #define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__

-#include "opencv2/core/gpumat.hpp"
-#include "cuda_runtime_api.h"
+#include <cuda_runtime.h>
+#include "opencv2/core/cvdef.h"
+
+// This is only header file that depends on Cuda. All other headers are independent.
+// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
+// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
+// In this case you have to install Cuda Toolkit.

 namespace cv
 {
    namespace gpu
    {
-        // This is only header file that depends on Cuda. All other headers are independent.
-        // So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
-        // But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
-        // In this case you have to install Cuda Toolkit.
+        class Stream;
+
        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
--- a/modules/core/src/cuda/matrix_operations.cu
+++ b/modules/core/src/cuda/matrix_operations.cu
@@ -124,31 +124,31 @@ namespace cv { namespace gpu { namespace cuda

    void writeScalar(const uchar* vals)
    {
-        cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
+        cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
    }
    void writeScalar(const schar* vals)
    {
-        cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
+        cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
    }
    void writeScalar(const ushort* vals)
    {
-        cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
+        cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
    }
    void writeScalar(const short* vals)
    {
-        cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
+        cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
    }
    void writeScalar(const int* vals)
    {
-        cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
+        cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
    }
    void writeScalar(const float* vals)
    {
-        cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
+        cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
    }
    void writeScalar(const double* vals)
    {
-        cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
+        cvCudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
    }

    template<typename T>
@@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cuda
        dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);

        set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
-        cudaSafeCall( cudaGetLastError() );
+        cvCudaSafeCall( cudaGetLastError() );

        if (stream == 0)
-            cudaSafeCall ( cudaDeviceSynchronize() );
+            cvCudaSafeCall ( cudaDeviceSynchronize() );
    }

    template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar*  scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
@@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace cuda
        dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);

        set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
-        cudaSafeCall( cudaGetLastError() );
+        cvCudaSafeCall( cudaGetLastError() );

        if (stream == 0)
-            cudaSafeCall ( cudaDeviceSynchronize() );
+            cvCudaSafeCall ( cudaDeviceSynchronize() );
    }

    template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar*  scalar, int channels, cudaStream_t stream);
@@ -290,8 +290,8 @@ namespace cv { namespace gpu { namespace cuda
    template<typename T, typename D, typename S>
    void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
    {
-        cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
-        cudaSafeCall( cudaSetDoubleForDevice(&beta) );
+        cvCudaSafeCall( cudaSetDoubleForDevice(&alpha) );
+        cvCudaSafeCall( cudaSetDoubleForDevice(&beta) );
        Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
        cv::gpu::cuda::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
    }
--- a/modules/core/src/cudastream.cpp
+++ b/modules/core/src/cudastream.cpp
@@ -46,33 +46,30 @@ using namespace cv;
 using namespace cv::gpu;

 #if !defined (HAVE_CUDA)
-#define throw_nogpu() CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")

-cv::gpu::Stream::Stream() { throw_nogpu(); }
+cv::gpu::Stream::Stream() { throw_no_cuda(); }
 cv::gpu::Stream::~Stream() {}
-cv::gpu::Stream::Stream(const Stream&) { throw_nogpu(); }
-Stream& cv::gpu::Stream::operator=(const Stream&) { throw_nogpu(); return *this; }
-bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return false; }
-void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
-void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_nogpu(); }
-void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_nogpu(); }
-Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
-cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
-cv::gpu::Stream::Stream(Impl*) { throw_nogpu(); }
-void cv::gpu::Stream::create() { throw_nogpu(); }
-void cv::gpu::Stream::release() { throw_nogpu(); }
+cv::gpu::Stream::Stream(const Stream&) { throw_no_cuda(); }
+Stream& cv::gpu::Stream::operator=(const Stream&) { throw_no_cuda(); return *this; }
+bool cv::gpu::Stream::queryIfComplete() { throw_no_cuda(); return false; }
+void cv::gpu::Stream::waitForCompletion() { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_no_cuda(); }
+void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_no_cuda(); }
+Stream& cv::gpu::Stream::Null() { throw_no_cuda(); static Stream s; return s; }
+cv::gpu::Stream::operator bool() const { throw_no_cuda(); return false; }
+cv::gpu::Stream::Stream(Impl*) { throw_no_cuda(); }
+void cv::gpu::Stream::create() { throw_no_cuda(); }
+void cv::gpu::Stream::release() { throw_no_cuda(); }

 #else /* !defined (HAVE_CUDA) */

-#include "opencv2/core/stream_accessor.hpp"
-
 namespace cv { namespace gpu
 {
    void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
@@ -134,14 +131,14 @@ bool cv::gpu::Stream::queryIfComplete()
    if (err == cudaErrorNotReady || err == cudaSuccess)
        return err == cudaSuccess;

-    cudaSafeCall(err);
+    cvCudaSafeCall(err);
    return false;
 }

 void cv::gpu::Stream::waitForCompletion()
 {
    cudaStream_t stream = Impl::getStream(impl);
-    cudaSafeCall( cudaStreamSynchronize(stream) );
+    cvCudaSafeCall( cudaStreamSynchronize(stream) );
 }

 void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
@@ -151,7 +148,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)

    cudaStream_t stream = Impl::getStream(impl);
    size_t bwidth = src.cols * src.elemSize();
-    cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
+    cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
 }

 void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
@@ -160,7 +157,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)

    cudaStream_t stream = Impl::getStream(impl);
    size_t bwidth = src.cols * src.elemSize();
-    cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
+    cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
 }

 void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
@@ -169,7 +166,7 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)

    cudaStream_t stream = Impl::getStream(impl);
    size_t bwidth = src.cols * src.elemSize();
-    cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
+    cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
 }

 void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
@@ -178,7 +175,7 @@ void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)

    cudaStream_t stream = Impl::getStream(impl);
    size_t bwidth = src.cols * src.elemSize();
-    cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
+    cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
 }

 void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
@@ -187,7 +184,7 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)

    cudaStream_t stream = Impl::getStream(impl);
    size_t bwidth = src.cols * src.elemSize();
-    cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
+    cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
 }

 void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
@@ -204,7 +201,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)

    if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
    {
-        cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
+        cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
        return;
    }

@@ -215,7 +212,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
        if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
        {
            int ival = saturate_cast<uchar>(val[0]);
-            cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
+            cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
            return;
        }
    }
@@ -302,7 +299,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat

    cudaStream_t stream = Impl::getStream(impl);

-    cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
+    cvCudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
 #else
    (void) callback;
    (void) userData;
@@ -331,7 +328,7 @@ void cv::gpu::Stream::create()
        release();

    cudaStream_t stream;
-    cudaSafeCall( cudaStreamCreate( &stream ) );
+    cvCudaSafeCall( cudaStreamCreate( &stream ) );

    impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));

@@ -343,7 +340,7 @@ void cv::gpu::Stream::release()
 {
    if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
    {
-        cudaSafeCall( cudaStreamDestroy(impl->stream) );
+        cvCudaSafeCall( cudaStreamDestroy(impl->stream) );
        cv::fastFree(impl);
    }
 }
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@@ -45,64 +45,38 @@
 using namespace cv;
 using namespace cv::gpu;

-#ifndef HAVE_CUDA
-
-#define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
-
-#else // HAVE_CUDA
-
-namespace
-{
-#if defined(__GNUC__)
-    #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
-#else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__)
-#endif
-
-    inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
-    {
-        if (err < 0)
-        {
-            String msg = cv::format("NPP API Call Error: %d", err);
-            cv::gpu::error(msg.c_str(), file, line, func);
-        }
-    }
-}
-
-#endif // HAVE_CUDA
-
 //////////////////////////////// Initialization & Info ////////////////////////

 #ifndef HAVE_CUDA

 int cv::gpu::getCudaEnabledDeviceCount() { return 0; }

-void cv::gpu::setDevice(int) { throw_nogpu; }
-int cv::gpu::getDevice() { throw_nogpu; return 0; }
+void cv::gpu::setDevice(int) { throw_no_cuda(); }
+int cv::gpu::getDevice() { throw_no_cuda(); return 0; }

-void cv::gpu::resetDevice() { throw_nogpu; }
+void cv::gpu::resetDevice() { throw_no_cuda(); }

-bool cv::gpu::deviceSupports(FeatureSet) { throw_nogpu; return false; }
+bool cv::gpu::deviceSupports(FeatureSet) { throw_no_cuda(); return false; }

-bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_nogpu; return false; }
-bool cv::gpu::TargetArchs::has(int, int) { throw_nogpu; return false; }
-bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_nogpu; return false; }
-bool cv::gpu::TargetArchs::hasBin(int, int) { throw_nogpu; return false; }
-bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_nogpu; return false; }
-bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_nogpu; return false; }
-bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_nogpu; return false; }
-bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_nogpu; return false; }
+bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_no_cuda(); return false; }
+bool cv::gpu::TargetArchs::has(int, int) { throw_no_cuda(); return false; }
+bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_no_cuda(); return false; }
+bool cv::gpu::TargetArchs::hasBin(int, int) { throw_no_cuda(); return false; }
+bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_no_cuda(); return false; }
+bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_no_cuda(); return false; }
+bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_no_cuda(); return false; }
+bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_no_cuda(); return false; }

-size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_nogpu; return 0; }
-void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu; }
-size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu; return 0; }
-size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu; return 0; }
-bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_nogpu; return false; }
-bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu; return false; }
-void cv::gpu::DeviceInfo::query() { throw_nogpu; }
+size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_no_cuda(); return 0; }
+void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_no_cuda(); }
+size_t cv::gpu::DeviceInfo::freeMemory() const { throw_no_cuda(); return 0; }
+size_t cv::gpu::DeviceInfo::totalMemory() const { throw_no_cuda(); return 0; }
+bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_no_cuda(); return false; }
+bool cv::gpu::DeviceInfo::isCompatible() const { throw_no_cuda(); return false; }
+void cv::gpu::DeviceInfo::query() { throw_no_cuda(); }

-void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu; }
-void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu; }
+void cv::gpu::printCudaDeviceInfo(int) { throw_no_cuda(); }
+void cv::gpu::printShortCudaDeviceInfo(int) { throw_no_cuda(); }

 #else // HAVE_CUDA

@@ -117,25 +91,25 @@ int cv::gpu::getCudaEnabledDeviceCount()
    if (error == cudaErrorNoDevice)
        return 0;

-    cudaSafeCall( error );
+    cvCudaSafeCall( error );
    return count;
 }

 void cv::gpu::setDevice(int device)
 {
-    cudaSafeCall( cudaSetDevice( device ) );
+    cvCudaSafeCall( cudaSetDevice( device ) );
 }

 int cv::gpu::getDevice()
 {
    int device;
-    cudaSafeCall( cudaGetDevice( &device ) );
+    cvCudaSafeCall( cudaGetDevice( &device ) );
    return device;
 }

 void cv::gpu::resetDevice()
 {
-    cudaSafeCall( cudaDeviceReset() );
+    cvCudaSafeCall( cudaDeviceReset() );
 }

 namespace
@@ -328,7 +302,7 @@ namespace
        if (!props_[devID])
        {
            props_[devID] = new cudaDeviceProp;
-            cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
+            cvCudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
        }

        return props_[devID];
@@ -348,7 +322,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
    if (prevDeviceID != device_id_)
        setDevice(device_id_);

-    cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
+    cvCudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );

    if (prevDeviceID != device_id_)
        setDevice(prevDeviceID);
@@ -434,8 +408,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
    printf("Device count: %d\n", count);

    int driverVersion = 0, runtimeVersion = 0;
-    cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
-    cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
+    cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
+    cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );

    const char *computeMode[] = {
        "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
@@ -449,7 +423,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
    for(int dev = beg; dev < end; ++dev)
    {
        cudaDeviceProp prop;
-        cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
+        cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );

        printf("\nDevice %d: \"%s\"\n", dev, prop.name);
        printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
@@ -511,13 +485,13 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
    int end = valid ? device+1 : count;

    int driverVersion = 0, runtimeVersion = 0;
-    cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
-    cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
+    cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
+    cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );

    for(int dev = beg; dev < end; ++dev)
    {
        cudaDeviceProp prop;
-        cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
+        cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );

        const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
        printf("Device %d:  \"%s\"  %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
@@ -846,18 +820,18 @@ namespace
    class EmptyFuncTable : public GpuFuncTable
    {
    public:
-        void copy(const Mat&, GpuMat&) const { throw_nogpu; }
-        void copy(const GpuMat&, Mat&) const { throw_nogpu; }
-        void copy(const GpuMat&, GpuMat&) const { throw_nogpu; }
+        void copy(const Mat&, GpuMat&) const { throw_no_cuda(); }
+        void copy(const GpuMat&, Mat&) const { throw_no_cuda(); }
+        void copy(const GpuMat&, GpuMat&) const { throw_no_cuda(); }

-        void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_nogpu; }
+        void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_no_cuda(); }

-        void convert(const GpuMat&, GpuMat&) const { throw_nogpu; }
-        void convert(const GpuMat&, GpuMat&, double, double) const { throw_nogpu; }
+        void convert(const GpuMat&, GpuMat&) const { throw_no_cuda(); }
+        void convert(const GpuMat&, GpuMat&, double, double) const { throw_no_cuda(); }

-        void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_nogpu; }
+        void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_no_cuda(); }

-        void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu; }
+        void mallocPitch(void**, size_t*, size_t, size_t) const { throw_no_cuda(); }
        void free(void*) const {}
    };

@@ -1009,7 +983,7 @@ namespace

            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );

-            cudaSafeCall( cudaDeviceSynchronize() );
+            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    };
    template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
@@ -1024,7 +998,7 @@ namespace

            nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );

-            cudaSafeCall( cudaDeviceSynchronize() );
+            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    };

@@ -1066,7 +1040,7 @@ namespace

            nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );

-            cudaSafeCall( cudaDeviceSynchronize() );
+            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    };
    template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
@@ -1083,7 +1057,7 @@ namespace

            nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );

-            cudaSafeCall( cudaDeviceSynchronize() );
+            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    };

@@ -1114,7 +1088,7 @@ namespace

            nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );

-            cudaSafeCall( cudaDeviceSynchronize() );
+            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    };
    template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
@@ -1131,7 +1105,7 @@ namespace

            nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );

-            cudaSafeCall( cudaDeviceSynchronize() );
+            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    };

@@ -1157,7 +1131,7 @@ namespace

            nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );

-            cudaSafeCall( cudaDeviceSynchronize() );
+            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    };

@@ -1174,15 +1148,15 @@ namespace
    public:
        void copy(const Mat& src, GpuMat& dst) const
        {
-            cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
+            cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
        }
        void copy(const GpuMat& src, Mat& dst) const
        {
-            cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
+            cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
        }
        void copy(const GpuMat& src, GpuMat& dst) const
        {
-            cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
+            cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
        }

        void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
@@ -1327,7 +1301,7 @@ namespace
            {
                if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
                {
-                    cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
+                    cvCudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
                    return;
                }

@@ -1338,7 +1312,7 @@ namespace
                    if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
                    {
                        int val = saturate_cast<uchar>(s[0]);
-                        cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
+                        cvCudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
                        return;
                    }
                }
@@ -1393,7 +1367,7 @@ namespace

        void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
        {
-            cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
+            cvCudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
        }

        void free(void* devPtr) const
@@ -1551,18 +1525,117 @@ void cv::gpu::GpuMat::release()
 ////////////////////////////////////////////////////////////////////////
 // Error handling

-void cv::gpu::error(const char *error_string, const char *file, const int line, const char *func)
+#ifdef HAVE_CUDA
+
+namespace
 {
-    int code = CV_GpuApiCallError;
+    #define error_entry(entry)  { entry, #entry }

-    if (std::uncaught_exception())
+    struct ErrorEntry
    {
-        const char* errorStr = cvErrorStr(code);
-        const char* function = func ? func : "unknown function";
+        int code;
+        const char* str;
+    };

-        fprintf(stderr, "OpenCV Error: %s(%s) in %s, file %s, line %d", errorStr, error_string, function, file, line);
-        fflush(stderr);
-    }
-    else
-        cv::error( cv::Exception(code, error_string, func, file, line) );
+    struct ErrorEntryComparer
+    {
+        int code;
+        ErrorEntryComparer(int code_) : code(code_) {}
+        bool operator()(const ErrorEntry& e) const { return e.code == code; }
+    };
+
+    const ErrorEntry npp_errors [] =
+    {
+        error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
+        error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
+        error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
+
+#if defined (_MSC_VER)
+        error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
+#endif
+
+        error_entry( NPP_BAD_ARG_ERROR ),
+        error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
+        error_entry( NPP_TEXTURE_BIND_ERROR ),
+        error_entry( NPP_COEFF_ERROR ),
+        error_entry( NPP_RECT_ERROR ),
+        error_entry( NPP_QUAD_ERROR ),
+        error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
+        error_entry( NPP_NOT_EVEN_STEP_ERROR ),
+        error_entry( NPP_INTERPOLATION_ERROR ),
+        error_entry( NPP_RESIZE_FACTOR_ERROR ),
+        error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
+        error_entry( NPP_MEMFREE_ERR ),
+        error_entry( NPP_MEMSET_ERR ),
+        error_entry( NPP_MEMCPY_ERROR ),
+        error_entry( NPP_MEM_ALLOC_ERR ),
+        error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
+        error_entry( NPP_MIRROR_FLIP_ERR ),
+        error_entry( NPP_INVALID_INPUT ),
+        error_entry( NPP_ALIGNMENT_ERROR ),
+        error_entry( NPP_STEP_ERROR ),
+        error_entry( NPP_SIZE_ERROR ),
+        error_entry( NPP_POINTER_ERROR ),
+        error_entry( NPP_NULL_POINTER_ERROR ),
+        error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
+        error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
+        error_entry( NPP_ERROR ),
+        error_entry( NPP_NO_ERROR ),
+        error_entry( NPP_SUCCESS ),
+        error_entry( NPP_WARNING ),
+        error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
+        error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
+        error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
+        error_entry( NPP_DOUBLE_SIZE_WARNING ),
+        error_entry( NPP_ODD_ROI_WARNING )
+    };
+
+    const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
+}
+
+#endif
+
+String cv::gpu::getNppErrorMessage(int code)
+{
+#ifndef HAVE_CUDA
+    (void) code;
+    return String();
+#else
+    size_t idx = std::find_if(npp_errors, npp_errors + npp_error_num, ErrorEntryComparer(code)) - npp_errors;
+
+    const char* msg = (idx != npp_error_num) ? npp_errors[idx].str : "Unknown error code";
+    String str = cv::format("%s [Code = %d]", msg, code);
+
+    return str;
+#endif
+}
+
+bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
+{
+#ifndef HAVE_CUDA
+    (void) cpuBorderType;
+    (void) gpuBorderType;
+    return false;
+#else
+    switch (cpuBorderType)
+    {
+    case IPL_BORDER_REFLECT_101:
+        gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;
+        return true;
+    case IPL_BORDER_REPLICATE:
+        gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;
+        return true;
+    case IPL_BORDER_CONSTANT:
+        gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;
+        return true;
+    case IPL_BORDER_REFLECT:
+        gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;
+        return true;
+    case IPL_BORDER_WRAP:
+        gpuBorderType = cv::gpu::BORDER_WRAP_GPU;
+        return true;
+    default:
+        return false;
+    };
+#endif
 }
--- a/modules/core/src/matrix_operations.cpp
+++ b/modules/core/src/matrix_operations.cpp
@@ -41,7 +41,6 @@
 //M*/

 #include "precomp.hpp"
-#include "opencv2/core/gpumat.hpp"

 using namespace cv;
 using namespace cv::gpu;
@@ -181,30 +180,29 @@ bool cv::gpu::CudaMem::empty() const

 #if !defined (HAVE_CUDA)

-void cv::gpu::registerPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
-void cv::gpu::unregisterPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
-void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/)
-{ CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
-bool cv::gpu::CudaMem::canMapHostMemory() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return false; }
-void cv::gpu::CudaMem::release() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
-GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return GpuMat(); }
+void cv::gpu::registerPageLocked(Mat&) { throw_no_cuda(); }
+void cv::gpu::unregisterPageLocked(Mat&) { throw_no_cuda(); }
+void cv::gpu::CudaMem::create(int, int, int, int) { throw_no_cuda(); }
+bool cv::gpu::CudaMem::canMapHostMemory() { throw_no_cuda(); return false; }
+void cv::gpu::CudaMem::release() { throw_no_cuda(); }
+GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return GpuMat(); }

 #else /* !defined (HAVE_CUDA) */

 void cv::gpu::registerPageLocked(Mat& m)
 {
-    cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
+    cvCudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
 }

 void cv::gpu::unregisterPageLocked(Mat& m)
 {
-    cudaSafeCall( cudaHostUnregister(m.ptr()) );
+    cvCudaSafeCall( cudaHostUnregister(m.ptr()) );
 }

 bool cv::gpu::CudaMem::canMapHostMemory()
 {
    cudaDeviceProp prop;
-    cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
+    cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
    return (prop.canMapHostMemory != 0) ? true : false;
 }

@@ -222,7 +220,7 @@ namespace
 void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
 {
    if (_alloc_type == ALLOC_ZEROCOPY && !canMapHostMemory())
-            cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
+        CV_Error(cv::Error::GpuApiCallError, "ZeroCopy is not supported by current device");

    _type &= Mat::TYPE_MASK;
    if( rows == _rows && cols == _cols && type() == _type && data )
@@ -239,7 +237,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
        if (_alloc_type == ALLOC_ZEROCOPY)
        {
            cudaDeviceProp prop;
-            cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
+            cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
            step = alignUpStep(step, prop.textureAlignment);
        }
        int64 _nettosize = (int64)step*rows;
@@ -254,10 +252,10 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)

        switch (alloc_type)
        {
-            case ALLOC_PAGE_LOCKED:    cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
-            case ALLOC_ZEROCOPY:       cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );  break;
-            case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
-            default: cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
+        case ALLOC_PAGE_LOCKED:    cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
+        case ALLOC_ZEROCOPY:       cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );  break;
+        case ALLOC_WRITE_COMBINED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
+        default:                   CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
        }

        datastart = data =  (uchar*)ptr;
@@ -270,15 +268,13 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)

 GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
 {
+    CV_Assert( alloc_type == ALLOC_ZEROCOPY );
+
    GpuMat res;
-    if (alloc_type == ALLOC_ZEROCOPY)
-    {
-        void *pdev;
-        cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
-        res = GpuMat(rows, cols, type(), pdev, step);
-    }
-    else
-        cv::gpu::error("Zero-copy is not supported or memory was allocated without zero-copy flag", __FILE__, __LINE__);
+
+    void *pdev;
+    cvCudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
+    res = GpuMat(rows, cols, type(), pdev, step);

    return res;
 }
@@ -287,7 +283,7 @@ void cv::gpu::CudaMem::release()
 {
    if( refcount && CV_XADD(refcount, -1) == 1 )
    {
-        cudaSafeCall( cudaFreeHost(datastart ) );
+        cvCudaSafeCall( cudaFreeHost(datastart ) );
        fastFree(refcount);
    }
    data = datastart = dataend = 0;
--- a/modules/core/src/opengl_interop.cpp
+++ b/modules/core/src/opengl_interop.cpp
@@ -41,16 +41,12 @@
 //M*/

 #include "precomp.hpp"
-#include "opencv2/core/opengl.hpp"
-#include "opencv2/core/gpumat.hpp"

 #ifdef HAVE_OPENGL
-    #include "gl_core_3_1.hpp"
-
-    #ifdef HAVE_CUDA
-        #include <cuda_runtime.h>
-        #include <cuda_gl_interop.h>
-    #endif
+#  include "gl_core_3_1.hpp"
+#  ifdef HAVE_CUDA
+#    include <cuda_gl_interop.h>
+#  endif
 #endif

 using namespace cv;
@@ -59,15 +55,9 @@ using namespace cv::gpu;
 namespace
 {
    #ifndef HAVE_OPENGL
-        void throw_nogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
+        void throw_no_ogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
    #else
-        void throw_nogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
-
-        #ifndef HAVE_CUDA
-            void throw_nocuda() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); }
-        #else
-            void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
-        #endif
+        void throw_no_ogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
    #endif

 bool checkError(const char* file, const int line, const char* func = 0)
@@ -137,13 +127,13 @@ void cv::gpu::setGlDevice(int device)
 {
 #ifndef HAVE_OPENGL
    (void) device;
-    throw_nogl();
+    throw_no_ogl();
 #else
    #if !defined(HAVE_CUDA) || defined(CUDA_DISABLER)
        (void) device;
-        throw_nocuda();
+        throw_no_cuda();
    #else
-        cudaSafeCall( cudaGLSetGLDevice(device) );
+        cvCudaSafeCall( cudaGLSetGLDevice(device) );
    #endif
 #endif
 }
@@ -194,7 +184,7 @@ namespace
            return;

        cudaGraphicsResource_t resource;
-        cudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
+        cvCudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );

        release();

@@ -227,7 +217,7 @@ namespace
    CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream)
    {
        if (resource_)
-            cudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
+            cvCudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
    }

    CudaResource::GraphicsMapHolder::~GraphicsMapHolder()
@@ -250,14 +240,14 @@ namespace

        void* dst;
        size_t size;
-        cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
+        cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );

        CV_DbgAssert( width * height == size );

        if (stream == 0)
-            cudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
+            cvCudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
        else
-            cudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
+            cvCudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
    }

    void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream)
@@ -269,14 +259,14 @@ namespace

        void* src;
        size_t size;
-        cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
+        cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );

        CV_DbgAssert( width * height == size );

        if (stream == 0)
-            cudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
+            cvCudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
        else
-            cudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
+            cvCudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
    }

    void* CudaResource::map(cudaStream_t stream)
@@ -287,7 +277,7 @@ namespace

        void* ptr;
        size_t size;
-        cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
+        cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );

        h.reset();

@@ -476,7 +466,7 @@ void cv::ogl::Buffer::Impl::unmapHost()
 cv::ogl::Buffer::Buffer() : rows_(0), cols_(0), type_(0)
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_ = Impl::empty();
 #endif
@@ -490,7 +480,7 @@ cv::ogl::Buffer::Buffer(int arows, int acols, int atype, unsigned int abufId, bo
    (void) atype;
    (void) abufId;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_ = new Impl(abufId, autoRelease);
    rows_ = arows;
@@ -506,7 +496,7 @@ cv::ogl::Buffer::Buffer(Size asize, int atype, unsigned int abufId, bool autoRel
    (void) atype;
    (void) abufId;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_ = new Impl(abufId, autoRelease);
    rows_ = asize.height;
@@ -531,7 +521,7 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_
    (void) arr;
    (void) target;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    const int kind = arr.kind();

@@ -578,7 +568,7 @@ void cv::ogl::Buffer::create(int arows, int acols, int atype, Target target, boo
    (void) atype;
    (void) target;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    if (rows_ != arows || cols_ != acols || type_ != atype)
    {
@@ -607,7 +597,7 @@ void cv::ogl::Buffer::setAutoRelease(bool flag)
 {
 #ifndef HAVE_OPENGL
    (void) flag;
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_->setAutoRelease(flag);
 #endif
@@ -619,7 +609,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
    (void) arr;
    (void) target;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    const int kind = arr.kind();

@@ -647,7 +637,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
    case _InputArray::GPU_MAT:
        {
            #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-                throw_nocuda();
+                throw_no_cuda();
            #else
                GpuMat dmat = arr.getGpuMat();
                impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows);
@@ -672,7 +662,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
    (void) arr;
    (void) target;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    const int kind = arr.kind();

@@ -693,7 +683,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
    case _InputArray::GPU_MAT:
        {
            #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-                throw_nocuda();
+                throw_no_cuda();
            #else
                GpuMat& dmat = arr.getGpuMatRef();
                dmat.create(rows_, cols_, type_);
@@ -719,7 +709,7 @@ cv::ogl::Buffer cv::ogl::Buffer::clone(Target target, bool autoRelease) const
 #ifndef HAVE_OPENGL
    (void) target;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
    return cv::ogl::Buffer();
 #else
    ogl::Buffer buf;
@@ -732,7 +722,7 @@ void cv::ogl::Buffer::bind(Target target) const
 {
 #ifndef HAVE_OPENGL
    (void) target;
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_->bind(target);
 #endif
@@ -742,7 +732,7 @@ void cv::ogl::Buffer::unbind(Target target)
 {
 #ifndef HAVE_OPENGL
    (void) target;
-    throw_nogl();
+    throw_no_ogl();
 #else
    gl::BindBuffer(target, 0);
    CV_CheckGlError();
@@ -753,7 +743,7 @@ Mat cv::ogl::Buffer::mapHost(Access access)
 {
 #ifndef HAVE_OPENGL
    (void) access;
-    throw_nogl();
+    throw_no_ogl();
    return Mat();
 #else
    return Mat(rows_, cols_, type_, impl_->mapHost(access));
@@ -763,7 +753,7 @@ Mat cv::ogl::Buffer::mapHost(Access access)
 void cv::ogl::Buffer::unmapHost()
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
 #else
    return impl_->unmapHost();
 #endif
@@ -772,11 +762,11 @@ void cv::ogl::Buffer::unmapHost()
 GpuMat cv::ogl::Buffer::mapDevice()
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
    return GpuMat();
 #else
    #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-        throw_nocuda();
+        throw_no_cuda();
        return GpuMat();
    #else
        return GpuMat(rows_, cols_, type_, impl_->mapDevice());
@@ -787,10 +777,10 @@ GpuMat cv::ogl::Buffer::mapDevice()
 void cv::ogl::Buffer::unmapDevice()
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
 #else
    #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-        throw_nocuda();
+        throw_no_cuda();
    #else
        impl_->unmapDevice();
    #endif
@@ -800,7 +790,7 @@ void cv::ogl::Buffer::unmapDevice()
 unsigned int cv::ogl::Buffer::bufId() const
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
    return 0;
 #else
    return impl_->bufId();
@@ -926,7 +916,7 @@ void cv::ogl::Texture2D::Impl::bind() const
 cv::ogl::Texture2D::Texture2D() : rows_(0), cols_(0), format_(NONE)
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_ = Impl::empty();
 #endif
@@ -940,7 +930,7 @@ cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, unsigned int
    (void) aformat;
    (void) atexId;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_ = new Impl(atexId, autoRelease);
    rows_ = arows;
@@ -956,7 +946,7 @@ cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, unsigned int atexId, b
    (void) aformat;
    (void) atexId;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_ = new Impl(atexId, autoRelease);
    rows_ = asize.height;
@@ -980,7 +970,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
 #ifndef HAVE_OPENGL
    (void) arr;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    const int kind = arr.kind();

@@ -1016,7 +1006,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
    case _InputArray::GPU_MAT:
        {
            #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-                throw_nocuda();
+                throw_no_cuda();
            #else
                GpuMat dmat = arr.getGpuMat();
                ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
@@ -1051,7 +1041,7 @@ void cv::ogl::Texture2D::create(int arows, int acols, Format aformat, bool autoR
    (void) acols;
    (void) aformat;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    if (rows_ != arows || cols_ != acols || format_ != aformat)
    {
@@ -1080,7 +1070,7 @@ void cv::ogl::Texture2D::setAutoRelease(bool flag)
 {
 #ifndef HAVE_OPENGL
    (void) flag;
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_->setAutoRelease(flag);
 #endif
@@ -1091,7 +1081,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
 #ifndef HAVE_OPENGL
    (void) arr;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    const int kind = arr.kind();

@@ -1129,7 +1119,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
    case _InputArray::GPU_MAT:
        {
            #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-                throw_nocuda();
+                throw_no_cuda();
            #else
                GpuMat dmat = arr.getGpuMat();
                ogl::Buffer buf(dmat, ogl::Buffer::PIXEL_UNPACK_BUFFER);
@@ -1158,7 +1148,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
    (void) arr;
    (void) ddepth;
    (void) autoRelease;
-    throw_nogl();
+    throw_no_ogl();
 #else
    const int kind = arr.kind();

@@ -1180,7 +1170,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
    case _InputArray::GPU_MAT:
        {
            #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-                throw_nocuda();
+                throw_no_cuda();
            #else
                ogl::Buffer buf(rows_, cols_, CV_MAKE_TYPE(ddepth, cn), ogl::Buffer::PIXEL_PACK_BUFFER);
                buf.bind(ogl::Buffer::PIXEL_PACK_BUFFER);
@@ -1207,7 +1197,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
 void cv::ogl::Texture2D::bind() const
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
 #else
    impl_->bind();
 #endif
@@ -1216,7 +1206,7 @@ void cv::ogl::Texture2D::bind() const
 unsigned int cv::ogl::Texture2D::texId() const
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
    return 0;
 #else
    return impl_->texId();
@@ -1331,7 +1321,7 @@ void cv::ogl::Arrays::setAutoRelease(bool flag)
 void cv::ogl::Arrays::bind() const
 {
 #ifndef HAVE_OPENGL
-    throw_nogl();
+    throw_no_ogl();
 #else
    CV_Assert( texCoord_.empty() || texCoord_.size().area() == size_ );
    CV_Assert( normal_.empty() || normal_.size().area() == size_ );
@@ -1416,7 +1406,7 @@ void cv::ogl::render(const ogl::Texture2D& tex, Rect_<double> wndRect, Rect_<dou
    (void) tex;
    (void) wndRect;
    (void) texRect;
-    throw_nogl();
+    throw_no_ogl();
 #else
    if (!tex.empty())
    {
@@ -1488,7 +1478,7 @@ void cv::ogl::render(const ogl::Arrays& arr, int mode, Scalar color)
    (void) arr;
    (void) mode;
    (void) color;
-    throw_nogl();
+    throw_no_ogl();
 #else
    if (!arr.empty())
    {
@@ -1508,7 +1498,7 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala
    (void) indices;
    (void) mode;
    (void) color;
-    throw_nogl();
+    throw_no_ogl();
 #else
    if (!arr.empty() && !indices.empty())
    {
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@@ -46,8 +46,10 @@
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/core_c.h"
 #include "opencv2/core/gpumat.hpp"
+#include "opencv2/core/opengl.hpp"

 #include "opencv2/core/private.hpp"
+#include "opencv2/core/gpu_private.hpp"

 #include <assert.h>
 #include <ctype.h>
@@ -64,37 +66,6 @@
 #define GET_OPTIMIZED(func) (func)
 #endif

-#ifdef HAVE_CUDA
-
-#  include <cuda_runtime.h>
-#  include <npp.h>
-
-#  define CUDART_MINIMUM_REQUIRED_VERSION 4020
-#  define NPP_MINIMUM_REQUIRED_VERSION 4200
-
-#  if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
-#    error "Insufficient Cuda Runtime library version, please update it."
-#  endif
-
-#  if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
-#    error "Insufficient NPP version, please update it."
-#  endif
-
-#  if defined(__GNUC__)
-#    define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
-#  else
-#    define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__)
-#  endif
-
-static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
-{
-    if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func);
-}
-
-#else
-#  define cudaSafeCall(expr)
-#endif //HAVE_CUDA
-
 namespace cv
 {