renamed gpu::cudev namespace -> cuda::device

cudev is used for new device layer
2013-07-23 11:33:51 +04:00
parent e895b7455e
commit 8282f6ebc1
165 changed files with 539 additions and 539 deletions
--- a/modules/core/include/opencv2/core/cuda/block.hpp
+++ b/modules/core/include/opencv2/core/cuda/block.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_DEVICE_BLOCK_HPP__
 #define __OPENCV_GPU_DEVICE_BLOCK_HPP__
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    struct Block
    {
--- a/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
+++ b/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
@@ -47,7 +47,7 @@
 #include "vec_traits.hpp"
 #include "vec_math.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    //////////////////////////////////////////////////////////////
    // BrdConstant
--- a/modules/core/include/opencv2/core/cuda/color.hpp
+++ b/modules/core/include/opencv2/core/cuda/color.hpp
@@ -45,7 +45,7 @@
 #include "detail/color_detail.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    // All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
    // template <typename T> class ColorSpace1_to_ColorSpace2_traits
--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -87,7 +87,7 @@ namespace cv { namespace cuda
 namespace cv { namespace cuda
 {
-    namespace cudev
+    namespace device
    {
        __host__ __device__ __forceinline__ int divUp(int total, int grain)
        {
--- a/modules/core/include/opencv2/core/cuda/datamov_utils.hpp
+++ b/modules/core/include/opencv2/core/cuda/datamov_utils.hpp
@@ -45,7 +45,7 @@
 #include "common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
--- a/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp
@@ -49,7 +49,7 @@
 #include "../limits.hpp"
 #include "../functional.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    #ifndef CV_DESCALE
        #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
@@ -146,7 +146,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -219,7 +219,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \
    struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -300,7 +300,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \
    struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -346,7 +346,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::Gray2RGB<T, dcn> functor_type; \
+        typedef ::cv::cuda::device::color_detail::Gray2RGB<T, dcn> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -388,7 +388,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \
    struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::Gray2RGB5x5<green_bits> functor_type; \
+        typedef ::cv::cuda::device::color_detail::Gray2RGB5x5<green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -430,7 +430,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \
    struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB5x52Gray<green_bits> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB5x52Gray<green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -481,7 +481,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2Gray<T, scn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2Gray<T, scn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -532,7 +532,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -620,7 +620,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -699,7 +699,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -778,7 +778,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -854,7 +854,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -929,7 +929,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1067,7 +1067,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1075,7 +1075,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1083,7 +1083,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1091,7 +1091,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1207,7 +1207,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1215,7 +1215,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1223,7 +1223,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1231,7 +1231,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1340,7 +1340,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1348,7 +1348,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1356,7 +1356,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1364,7 +1364,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1480,7 +1480,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1488,7 +1488,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1496,7 +1496,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1504,7 +1504,7 @@ namespace cv { namespace cuda { namespace cudev
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1649,7 +1649,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2Lab_TRAITS(name, scn, dcn, srgb, blueIdx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2Lab<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2Lab<T, scn, dcn, srgb, blueIdx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1762,7 +1762,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_Lab2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::Lab2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::Lab2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1861,7 +1861,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2Luv_TRAITS(name, scn, dcn, srgb, blueIdx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::RGB2Luv<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::RGB2Luv<T, scn, dcn, srgb, blueIdx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@@ -1962,7 +1962,7 @@ namespace cv { namespace cuda { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_Luv2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef ::cv::cuda::cudev::color_detail::Luv2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::device::color_detail::Luv2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
--- a/modules/core/include/opencv2/core/cuda/detail/reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/reduce.hpp
@@ -47,7 +47,7 @@
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace reduce_detail
    {
--- a/modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp
@@ -47,7 +47,7 @@
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace reduce_key_val_detail
    {
--- a/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
@@ -47,7 +47,7 @@
 #include "../vec_traits.hpp"
 #include "../functional.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace transform_detail
    {
--- a/modules/core/include/opencv2/core/cuda/detail/type_traits_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/type_traits_detail.hpp
@@ -46,7 +46,7 @@
 #include "../common.hpp"
 #include "../vec_traits.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace type_traits_detail
    {
--- a/modules/core/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
@@ -45,7 +45,7 @@
 #include "../datamov_utils.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace vec_distance_detail
    {
--- a/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp
+++ b/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
 #define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template<class T> struct DynamicSharedMem
    {
--- a/modules/core/include/opencv2/core/cuda/emulation.hpp
+++ b/modules/core/include/opencv2/core/cuda/emulation.hpp
@@ -46,7 +46,7 @@
 #include "common.hpp"
 #include "warp_reduce.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    struct Emulation
    {
--- a/modules/core/include/opencv2/core/cuda/filters.hpp
+++ b/modules/core/include/opencv2/core/cuda/filters.hpp
@@ -48,7 +48,7 @@
 #include "vec_math.hpp"
 #include "type_traits.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename Ptr2D> struct PointFilter
    {
--- a/modules/core/include/opencv2/core/cuda/funcattrib.hpp
+++ b/modules/core/include/opencv2/core/cuda/funcattrib.hpp
@@ -45,7 +45,7 @@
 #include <cstdio>
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template<class Func>
    void printFuncAttrib(Func& func)
--- a/modules/core/include/opencv2/core/cuda/functional.hpp
+++ b/modules/core/include/opencv2/core/cuda/functional.hpp
@@ -49,7 +49,7 @@
 #include "type_traits.hpp"
 #include "device_functions.h"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    // Function Objects
    template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
--- a/modules/core/include/opencv2/core/cuda/limits.hpp
+++ b/modules/core/include/opencv2/core/cuda/limits.hpp
@@ -47,7 +47,7 @@
 #include <float.h>
 #include "common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
 template <class T> struct numeric_limits;
--- a/modules/core/include/opencv2/core/cuda/reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/reduce.hpp
@@ -47,7 +47,7 @@
 #include "detail/reduce.hpp"
 #include "detail/reduce_key_val.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <int N, typename T, class Op>
    __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
--- a/modules/core/include/opencv2/core/cuda/saturate_cast.hpp
+++ b/modules/core/include/opencv2/core/cuda/saturate_cast.hpp
@@ -45,7 +45,7 @@
 #include "common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
--- a/modules/core/include/opencv2/core/cuda/scan.hpp
+++ b/modules/core/include/opencv2/core/cuda/scan.hpp
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };
@@ -174,13 +174,13 @@ namespace cv { namespace cuda { namespace cudev
    __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
    {
    #if __CUDA_ARCH__ >= 300
-        const unsigned int laneId = cv::cuda::cudev::Warp::laneId();
+        const unsigned int laneId = cv::cuda::device::Warp::laneId();
        // scan on shuffl functions
        #pragma unroll
        for (int i = 1; i <= (OPENCV_GPU_WARP_SIZE / 2); i *= 2)
        {
-            const T n = cv::cuda::cudev::shfl_up(idata, i);
+            const T n = cv::cuda::device::shfl_up(idata, i);
            if (laneId >= i)
                  idata += n;
        }
--- a/modules/core/include/opencv2/core/cuda/simd_functions.hpp
+++ b/modules/core/include/opencv2/core/cuda/simd_functions.hpp
@@ -123,7 +123,7 @@
  vmin4(a,b)      per-byte unsigned minimum: min(a, b)
 */
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    // 2
--- a/modules/core/include/opencv2/core/cuda/transform.hpp
+++ b/modules/core/include/opencv2/core/cuda/transform.hpp
@@ -47,7 +47,7 @@
 #include "utility.hpp"
 #include "detail/transform_detail.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename D, typename UnOp, typename Mask>
    static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
--- a/modules/core/include/opencv2/core/cuda/type_traits.hpp
+++ b/modules/core/include/opencv2/core/cuda/type_traits.hpp
@@ -45,7 +45,7 @@
 #include "detail/type_traits_detail.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct IsSimpleParameter
    {
--- a/modules/core/include/opencv2/core/cuda/utility.hpp
+++ b/modules/core/include/opencv2/core/cuda/utility.hpp
@@ -46,7 +46,7 @@
 #include "saturate_cast.hpp"
 #include "datamov_utils.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    #define OPENCV_GPU_LOG_WARP_SIZE        (5)
    #define OPENCV_GPU_WARP_SIZE            (1 << OPENCV_GPU_LOG_WARP_SIZE)
--- a/modules/core/include/opencv2/core/cuda/vec_distance.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_distance.hpp
@@ -47,7 +47,7 @@
 #include "functional.hpp"
 #include "detail/vec_distance_detail.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct L1Dist
    {
--- a/modules/core/include/opencv2/core/cuda/vec_math.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp
@@ -46,7 +46,7 @@
 #include "vec_traits.hpp"
 #include "saturate_cast.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
 // saturate_cast
--- a/modules/core/include/opencv2/core/cuda/vec_traits.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_traits.hpp
@@ -45,7 +45,7 @@
 #include "common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template<typename T, int N> struct TypeVec;
--- a/modules/core/include/opencv2/core/cuda/warp.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
 #define __OPENCV_GPU_DEVICE_WARP_HPP__
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    struct Warp
    {
--- a/modules/core/include/opencv2/core/cuda/warp_reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp_reduce.hpp
@@ -43,7 +43,7 @@
 #ifndef OPENCV_GPU_WARP_REDUCE_HPP__
 #define OPENCV_GPU_WARP_REDUCE_HPP__
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <class T>
    __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
--- a/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_WARP_SHUFFLE_HPP__
 #define __OPENCV_GPU_WARP_SHUFFLE_HPP__
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T>
    __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
--- a/modules/gpu/src/calib3d.cpp
+++ b/modules/gpu/src/calib3d.cpp
@@ -55,7 +55,7 @@ void cv::cuda::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Ma
 #else
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace transform_points
    {
@@ -78,7 +78,7 @@ namespace cv { namespace cuda { namespace cudev
    }
 }}}
-using namespace ::cv::cuda::cudev;
+using namespace ::cv::cuda::device;
 namespace
 {
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -372,7 +372,7 @@ struct PyrLavel
    cv::Size sWindow;
 };
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace lbp
    {
@@ -473,7 +473,7 @@ public:
                acc += level.sFrame.width + 1;
            }
-            cudev::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
+            device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
                leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
        }
@@ -481,7 +481,7 @@ public:
            return 0;
        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
-        cudev::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
+        device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
        cudaSafeCall( cudaDeviceSynchronize() );
--- a/modules/gpu/src/cuda/calib3d.cu
+++ b/modules/gpu/src/cuda/calib3d.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/reduce.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
@@ -79,7 +79,7 @@ namespace cv { namespace cuda { namespace cudev
            cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
-            cv::cuda::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
+            cv::cuda::device::transform(src, dst, TransformOp(), WithOutMask(), stream);
        }
    } // namespace transform_points
@@ -120,7 +120,7 @@ namespace cv { namespace cuda { namespace cudev
            cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
-            cv::cuda::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
+            cv::cuda::device::transform(src, dst, ProjectOp(), WithOutMask(), stream);
        }
    } // namespace project_points
--- a/modules/gpu/src/cuda/ccomponetns.cu
+++ b/modules/gpu/src/cuda/ccomponetns.cu
@@ -50,7 +50,7 @@
 #include <iostream>
 #include <stdio.h>
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace ccl
    {
--- a/modules/gpu/src/cuda/global_motion.cu
+++ b/modules/gpu/src/cuda/global_motion.cu
@@ -47,7 +47,7 @@
 #include <thrust/functional.h>
 #include "opencv2/core/cuda/common.hpp"
-namespace cv { namespace cuda { namespace cudev { namespace globmotion {
+namespace cv { namespace cuda { namespace device { namespace globmotion {
 __constant__ float cml[9];
 __constant__ float cmr[9];
--- a/modules/gpu/src/cuda/hog.cu
+++ b/modules/gpu/src/cuda/hog.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    // Other values are not supported
    #define CELL_WIDTH 8
--- a/modules/gpu/src/cuda/lbp.cu
+++ b/modules/gpu/src/cuda/lbp.cu
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace lbp
    {
--- a/modules/gpu/src/cuda/lbp.hpp
+++ b/modules/gpu/src/cuda/lbp.hpp
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/emulation.hpp"
-namespace cv { namespace cuda { namespace cudev {
+namespace cv { namespace cuda { namespace device {
 namespace lbp {
--- a/modules/gpu/src/global_motion.cpp
+++ b/modules/gpu/src/global_motion.cpp
@@ -53,7 +53,7 @@ void cv::cuda::calcWobbleSuppressionMaps(
 #else
-namespace cv { namespace cuda { namespace cudev { namespace globmotion {
+namespace cv { namespace cuda { namespace device { namespace globmotion {
    int compactPoints(int N, float *points0, float *points1, const uchar *mask);
@@ -70,7 +70,7 @@ void cv::cuda::compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mas
    CV_Assert(points0.cols == mask.cols && points1.cols == mask.cols);
    int npoints = points0.cols;
-    int remaining = cv::cuda::cudev::globmotion::compactPoints(
+    int remaining = cv::cuda::device::globmotion::compactPoints(
            npoints, (float*)points0.data, (float*)points1.data, mask.data);
    points0 = points0.colRange(0, remaining);
@@ -88,7 +88,7 @@ void cv::cuda::calcWobbleSuppressionMaps(
    mapx.create(size, CV_32F);
    mapy.create(size, CV_32F);
-    cv::cuda::cudev::globmotion::calcWobbleSuppressionMaps(
+    cv::cuda::device::globmotion::calcWobbleSuppressionMaps(
                left, idx, right, size.width, size.height,
                ml.ptr<float>(), mr.ptr<float>(), mapx, mapy);
 }
--- a/modules/gpu/src/graphcuts.cpp
+++ b/modules/gpu/src/graphcuts.cpp
@@ -52,7 +52,7 @@ void cv::cuda::labelComponents(const GpuMat&, GpuMat&, int, Stream&) { throw_no_
 #else /* !defined (HAVE_CUDA) */
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace ccl
    {
@@ -81,12 +81,12 @@ void cv::cuda::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Sca
    static const func_t suppotLookup[8][4] =
    {   //    1,    2,     3,     4
-        { cudev::ccl::computeEdges<uchar>,  0,  cudev::ccl::computeEdges<uchar3>,  cudev::ccl::computeEdges<uchar4>  },// CV_8U
+        { device::ccl::computeEdges<uchar>,  0,  device::ccl::computeEdges<uchar3>,  device::ccl::computeEdges<uchar4>  },// CV_8U
        { 0,                                 0,  0,                                  0                                  },// CV_16U
-        { cudev::ccl::computeEdges<ushort>, 0,  cudev::ccl::computeEdges<ushort3>, cudev::ccl::computeEdges<ushort4> },// CV_8S
+        { device::ccl::computeEdges<ushort>, 0,  device::ccl::computeEdges<ushort3>, device::ccl::computeEdges<ushort4> },// CV_8S
        { 0,                                 0,  0,                                  0                                  },// CV_16S
-        { cudev::ccl::computeEdges<int>,    0,  0,                                  0                                  },// CV_32S
+        { device::ccl::computeEdges<int>,    0,  0,                                  0                                  },// CV_32S
-        { cudev::ccl::computeEdges<float>,  0,  0,                                  0                                  },// CV_32F
+        { device::ccl::computeEdges<float>,  0,  0,                                  0                                  },// CV_32F
        { 0,                                 0,  0,                                  0                                  },// CV_64F
        { 0,                                 0,  0,                                  0                                  } // CV_USRTYPE1
    };
@@ -112,7 +112,7 @@ void cv::cuda::labelComponents(const GpuMat& mask, GpuMat& components, int flags
    components.create(mask.size(), CV_32SC1);
    cudaStream_t stream = StreamAccessor::getStream(s);
-    cudev::ccl::labelComponents(mask, components, flags, stream);
+    device::ccl::labelComponents(mask, components, flags, stream);
 }
 namespace
--- a/modules/gpu/src/hog.cpp
+++ b/modules/gpu/src/hog.cpp
@@ -62,7 +62,7 @@ void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, std::ve
 #else
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace hog
    {
@@ -102,7 +102,7 @@ namespace cv { namespace cuda { namespace cudev
    }
 }}}
-using namespace ::cv::cuda::cudev;
+using namespace ::cv::cuda::device;
 cv::cuda::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
                                      int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -297,7 +297,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
 #ifdef HAVE_CUFFT
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
@@ -320,7 +320,7 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
    (void) flags;
    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream);
-    static Caller callers[] = { cudev::mulSpectrums, cudev::mulSpectrums_CONJ };
+    static Caller callers[] = { device::mulSpectrums, device::mulSpectrums_CONJ };
    GpuMat src1 = _src1.getGpuMat();
    GpuMat src2 = _src2.getGpuMat();
@@ -341,7 +341,7 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
 #ifdef HAVE_CUFFT
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
@@ -365,7 +365,7 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr
    (void)flags;
    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, PtrStepSz<cufftComplex>, cudaStream_t stream);
-    static Caller callers[] = { cudev::mulAndScaleSpectrums, cudev::mulAndScaleSpectrums_CONJ };
+    static Caller callers[] = { device::mulAndScaleSpectrums, device::mulAndScaleSpectrums_CONJ };
    GpuMat src1 = _src1.getGpuMat();
    GpuMat src2 = _src2.getGpuMat();
--- a/modules/gpuarithm/src/core.cpp
+++ b/modules/gpuarithm/src/core.cpp
@@ -66,7 +66,7 @@ void cv::cuda::copyMakeBorder(InputArray, OutputArray, int, int, int, int, int,
 ////////////////////////////////////////////////////////////////////////
 // merge/split
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace split_merge
    {
@@ -112,7 +112,7 @@ namespace
                src_as_devmem[i] = src[i];
            PtrStepSzb dst_as_devmem(dst);
-            cv::cuda::cudev::split_merge::merge(src_as_devmem, dst_as_devmem, (int)n, CV_ELEM_SIZE(depth), StreamAccessor::getStream(stream));
+            cv::cuda::device::split_merge::merge(src_as_devmem, dst_as_devmem, (int)n, CV_ELEM_SIZE(depth), StreamAccessor::getStream(stream));
        }
    }
@@ -145,7 +145,7 @@ namespace
            dst_as_devmem[i] = dst[i];
        PtrStepSzb src_as_devmem(src);
-        cv::cuda::cudev::split_merge::split(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), StreamAccessor::getStream(stream));
+        cv::cuda::device::split_merge::split(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), StreamAccessor::getStream(stream));
    }
 }
@@ -503,7 +503,7 @@ Ptr<LookUpTable> cv::cuda::createLookUpTable(InputArray lut)
 ////////////////////////////////////////////////////////////////////////
 // copyMakeBorder
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace imgproc
    {
@@ -515,7 +515,7 @@ namespace
 {
    template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
    {
-        using namespace ::cv::cuda::cudev::imgproc;
+        using namespace ::cv::cuda::device::imgproc;
        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
--- a/modules/gpuarithm/src/cuda/absdiff_mat.cu
+++ b/modules/gpuarithm/src/cuda/absdiff_mat.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -102,7 +102,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -121,18 +121,18 @@ namespace arithm
 {
    void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
    }
    void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
    }
    template <typename T>
    void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
    }
    template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/absdiff_scalar.cu
+++ b/modules/gpuarithm/src/cuda/absdiff_scalar.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -69,7 +69,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -83,7 +83,7 @@ namespace arithm
    {
        AbsDiffScalar<T, S> op(static_cast<S>(val));
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
    }
    template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/add_mat.cu
+++ b/modules/gpuarithm/src/cuda/add_mat.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -89,7 +89,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -108,21 +108,21 @@ namespace arithm
 {
    void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
    }
    void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
    }
    template <typename T, typename D>
    void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
    }
    template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/add_scalar.cu
+++ b/modules/gpuarithm/src/cuda/add_scalar.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -68,7 +68,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
    {
@@ -83,9 +83,9 @@ namespace arithm
        AddScalar<T, S, D> op(static_cast<S>(val));
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }
    template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/add_weighted.cu
+++ b/modules/gpuarithm/src/cuda/add_weighted.cu
@@ -50,7 +50,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -100,7 +100,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
    {
@@ -121,7 +121,7 @@ namespace arithm
    {
        AddWeighted<T1, T2, D> op(alpha, beta, gamma);
-        cudev::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        device::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }
    template void addWeighted<uchar, uchar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/bitwise_mat.cu
+++ b/modules/gpuarithm/src/cuda/bitwise_mat.cu
@@ -51,9 +51,9 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -77,33 +77,33 @@ namespace arithm
    template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
+            device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
    }
    template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
    }
    template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
    }
    template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
    }
    template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/bitwise_scalar.cu
+++ b/modules/gpuarithm/src/cuda/bitwise_scalar.cu
@@ -51,9 +51,9 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -72,17 +72,17 @@ namespace arithm
 {
    template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
    }
    template <typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
    }
    template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
    }
    template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/cmp_mat.cu
+++ b/modules/gpuarithm/src/cuda/cmp_mat.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -107,7 +107,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -131,26 +131,26 @@ namespace arithm
 {
    void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
    }
    void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
    }
    void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
    }
    void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
    }
    template <template <typename> class Op, typename T>
    void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
        Cmp<Op<T>, T> op;
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
    }
    template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
--- a/modules/gpuarithm/src/cuda/cmp_scalar.cu
+++ b/modules/gpuarithm/src/cuda/cmp_scalar.cu
@@ -52,7 +52,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -125,7 +125,7 @@ namespace arithm
 #undef TYPE_VEC
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
    {
@@ -144,7 +144,7 @@ namespace arithm
        src_t val1 = VecTraits<src_t>::make(sval);
        CmpScalar<Op<T>, T, cn> op(val1);
-        cudev::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
+        device::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
    }
    template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
--- a/modules/gpuarithm/src/cuda/copy_make_border.cu
+++ b/modules/gpuarithm/src/cuda/copy_make_border.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace imgproc
    {
--- a/modules/gpuarithm/src/cuda/countnonzero.cu
+++ b/modules/gpuarithm/src/cuda/countnonzero.cu
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/emulation.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace countNonZero
 {
@@ -80,7 +80,7 @@ namespace countNonZero
            }
        }
-        cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
+        device::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
    #if __CUDA_ARCH__ >= 200
        if (tid == 0)
@@ -105,7 +105,7 @@ namespace countNonZero
        {
            mycount = tid < gridDim.x * gridDim.y ? count[tid] : 0;
-            cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
+            device::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
            if (tid == 0)
            {
--- a/modules/gpuarithm/src/cuda/div_mat.cu
+++ b/modules/gpuarithm/src/cuda/div_mat.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -128,7 +128,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -147,12 +147,12 @@ namespace arithm
 {
    void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
    }
    void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
    }
    template <typename T, typename S, typename D>
@@ -161,12 +161,12 @@ namespace arithm
        if (scale == 1)
        {
            Div<T, D> op;
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
        else
        {
            DivScale<T, S, D> op(static_cast<S>(scale));
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
    }
--- a/modules/gpuarithm/src/cuda/div_scalar.cu
+++ b/modules/gpuarithm/src/cuda/div_scalar.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -80,7 +80,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
    {
@@ -99,12 +99,12 @@ namespace arithm
        if (inv)
        {
            DivScalarInv<T, S, D> op(static_cast<S>(val));
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
        else
        {
            DivScalar<T, S, D> op(static_cast<S>(val));
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
    }
--- a/modules/gpuarithm/src/cuda/integral.cu
+++ b/modules/gpuarithm/src/cuda/integral.cu
@@ -44,7 +44,7 @@
 #include "opencv2/core/cuda/common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace imgproc
    {
--- a/modules/gpuarithm/src/cuda/math.cu
+++ b/modules/gpuarithm/src/cuda/math.cu
@@ -53,12 +53,12 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 //////////////////////////////////////////////////////////////////////////
 // absMat
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -70,7 +70,7 @@ namespace arithm
    template <typename T>
    void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
    }
    template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -99,7 +99,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -111,7 +111,7 @@ namespace arithm
    template <typename T>
    void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
    }
    template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -126,7 +126,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // sqrtMat
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -138,7 +138,7 @@ namespace arithm
    template <typename T>
    void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
    }
    template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -153,7 +153,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // logMat
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -165,7 +165,7 @@ namespace arithm
    template <typename T>
    void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
    }
    template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -195,7 +195,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -207,7 +207,7 @@ namespace arithm
    template <typename T>
    void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
    }
    template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -275,7 +275,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -287,7 +287,7 @@ namespace arithm
    template<typename T>
    void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
    }
    template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/minmax.cu
+++ b/modules/gpuarithm/src/cuda/minmax.cu
@@ -52,7 +52,7 @@
 #include "opencv2/core/cuda/utility.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace minMax
 {
@@ -105,7 +105,7 @@ namespace minMax
                const minimum<R> minOp;
                const maximum<R> maxOp;
-                cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
+                device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
                if (tid == 0)
                {
@@ -153,7 +153,7 @@ namespace minMax
            }
        }
-        cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
+        device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
        GlobalReduce<BLOCK_SIZE, R>::run(mymin, mymax, minval, maxval, tid, bid, sminval, smaxval);
    }
--- a/modules/gpuarithm/src/cuda/minmax_mat.cu
+++ b/modules/gpuarithm/src/cuda/minmax_mat.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 //////////////////////////////////////////////////////////////////////////
 // min
@@ -81,7 +81,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -104,17 +104,17 @@ namespace arithm
 {
    void minMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
    }
    void minMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
    }
    template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
    }
    template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@@ -127,7 +127,7 @@ namespace arithm
    template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
    }
    template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
@@ -167,7 +167,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -190,17 +190,17 @@ namespace arithm
 {
    void maxMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
    }
    void maxMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
    }
    template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
    }
    template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@@ -213,7 +213,7 @@ namespace arithm
    template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
    }
    template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/minmaxloc.cu
+++ b/modules/gpuarithm/src/cuda/minmaxloc.cu
@@ -52,7 +52,7 @@
 #include "opencv2/core/cuda/utility.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace minMaxLoc
 {
--- a/modules/gpuarithm/src/cuda/mul_mat.cu
+++ b/modules/gpuarithm/src/cuda/mul_mat.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -109,7 +109,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -128,12 +128,12 @@ namespace arithm
 {
    void mulMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
    }
    void mulMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
    }
    template <typename T, typename S, typename D>
@@ -142,12 +142,12 @@ namespace arithm
        if (scale == 1)
        {
            Mul<T, D> op;
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
        else
        {
            MulScale<T, S, D> op(static_cast<S>(scale));
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
    }
--- a/modules/gpuarithm/src/cuda/mul_scalar.cu
+++ b/modules/gpuarithm/src/cuda/mul_scalar.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -68,7 +68,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
    {
@@ -81,7 +81,7 @@ namespace arithm
    void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
    {
        MulScalar<T, S, D> op(static_cast<S>(val));
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }
    template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/mul_spectrums.cu
+++ b/modules/gpuarithm/src/cuda/mul_spectrums.cu
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    //////////////////////////////////////////////////////////////////////////
    // mulSpectrums
--- a/modules/gpuarithm/src/cuda/polar_cart.cu
+++ b/modules/gpuarithm/src/cuda/polar_cart.cu
@@ -44,7 +44,7 @@
 #include "opencv2/core/cuda/common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace mathfunc
    {
--- a/modules/gpuarithm/src/cuda/reduce.cu
+++ b/modules/gpuarithm/src/cuda/reduce.cu
@@ -53,7 +53,7 @@
 #include "unroll_detail.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace reduce
 {
@@ -191,7 +191,7 @@ namespace reduce
        volatile S* srow = smem + threadIdx.y * 16;
        myVal = srow[threadIdx.x];
-        cudev::reduce<16>(srow, myVal, threadIdx.x, op);
+        device::reduce<16>(srow, myVal, threadIdx.x, op);
        if (threadIdx.x == 0)
            srow[0] = myVal;
@@ -275,7 +275,7 @@ namespace reduce
        for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
            myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));
-        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
+        device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
        if (threadIdx.x == 0)
            dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));
--- a/modules/gpuarithm/src/cuda/split_merge.cu
+++ b/modules/gpuarithm/src/cuda/split_merge.cu
@@ -44,7 +44,7 @@
 #include "opencv2/core/cuda/common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace split_merge
    {
--- a/modules/gpuarithm/src/cuda/sub_mat.cu
+++ b/modules/gpuarithm/src/cuda/sub_mat.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -89,7 +89,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -108,21 +108,21 @@ namespace arithm
 {
    void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
    }
    void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
+        device::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
    }
    template <typename T, typename D>
    void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
    }
    template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/sub_scalar.cu
+++ b/modules/gpuarithm/src/cuda/sub_scalar.cu
@@ -51,7 +51,7 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
@@ -69,7 +69,7 @@ namespace arithm
    };
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
    {
@@ -84,9 +84,9 @@ namespace arithm
        SubScalar<T, S, D> op(static_cast<S>(val), inv ? -1 : 1);
        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }
    template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
--- a/modules/gpuarithm/src/cuda/sum.cu
+++ b/modules/gpuarithm/src/cuda/sum.cu
@@ -53,7 +53,7 @@
 #include "unroll_detail.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace sum
 {
@@ -130,7 +130,7 @@ namespace sum
            {
                sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
-                cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
+                device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
                if (tid == 0)
                {
@@ -173,7 +173,7 @@ namespace sum
            }
        }
-        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
+        device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
        GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
    }
--- a/modules/gpuarithm/src/cuda/threshold.cu
+++ b/modules/gpuarithm/src/cuda/threshold.cu
@@ -51,9 +51,9 @@
 #include "arithm_func_traits.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -82,7 +82,7 @@ namespace arithm
    void threshold_caller(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream)
    {
        Op<T> op(thresh, maxVal);
-        cudev::transform(src, dst, op, WithOutMask(), stream);
+        device::transform(src, dst, op, WithOutMask(), stream);
    }
    template <typename T>
--- a/modules/gpuarithm/src/cuda/transpose.cu
+++ b/modules/gpuarithm/src/cuda/transpose.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace arithm
 {
--- a/modules/gpuarithm/src/cuda/unroll_detail.hpp
+++ b/modules/gpuarithm/src/cuda/unroll_detail.hpp
@@ -75,11 +75,11 @@ namespace detail
        template <int BLOCK_SIZE, typename R>
        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem)
        {
-            return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
+            return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE);
        }
        template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&> tie(R& val)
        {
            return thrust::tie(val.x, val.y);
        }
@@ -95,11 +95,11 @@ namespace detail
        template <int BLOCK_SIZE, typename R>
        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
        {
-            return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
+            return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
        }
        template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&> tie(R& val)
        {
            return thrust::tie(val.x, val.y, val.z);
        }
@@ -115,11 +115,11 @@ namespace detail
        template <int BLOCK_SIZE, typename R>
        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
        {
-            return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
+            return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
        }
        template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&, typename cv::cuda::device::VecTraits<R>::elem_type&> tie(R& val)
        {
            return thrust::tie(val.x, val.y, val.z, val.w);
        }
--- a/modules/gpuarithm/src/element_operations.cpp
+++ b/modules/gpuarithm/src/element_operations.cpp
@@ -3041,7 +3041,7 @@ void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
 ////////////////////////////////////////////////////////////////////////
 // Polar <-> Cart
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace mathfunc
    {
@@ -3054,7 +3054,7 @@ namespace
 {
    void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
    {
-        using namespace ::cv::cuda::cudev::mathfunc;
+        using namespace ::cv::cuda::device::mathfunc;
        CV_Assert(x.size() == y.size() && x.type() == y.type());
        CV_Assert(x.depth() == CV_32F);
@@ -3069,7 +3069,7 @@ namespace
    void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
    {
-        using namespace ::cv::cuda::cudev::mathfunc;
+        using namespace ::cv::cuda::device::mathfunc;
        CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
        CV_Assert(mag.depth() == CV_32F);
--- a/modules/gpuarithm/src/reductions.cpp
+++ b/modules/gpuarithm/src/reductions.cpp
@@ -751,7 +751,7 @@ void cv::cuda::normalize(InputArray _src, OutputArray dst, double a, double b, i
 ////////////////////////////////////////////////////////////////////////
 // integral
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace imgproc
    {
@@ -776,7 +776,7 @@ void cv::cuda::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Strea
    {
        ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
-        cv::cuda::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
+        cv::cuda::device::imgproc::shfl_integral_gpu(src, buffer, stream);
        _dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
        GpuMat dst = _dst.getGpuMat();
--- a/modules/gpubgsegm/src/cuda/fgd.cu
+++ b/modules/gpubgsegm/src/cuda/fgd.cu
@@ -51,7 +51,7 @@
 #include "fgd.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace fgd
 {
--- a/modules/gpubgsegm/src/cuda/gmg.cu
+++ b/modules/gpubgsegm/src/cuda/gmg.cu
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/limits.hpp"
-namespace cv { namespace cuda { namespace cudev {
+namespace cv { namespace cuda { namespace device {
    namespace gmg
    {
        __constant__ int   c_width;
--- a/modules/gpubgsegm/src/cuda/mog.cu
+++ b/modules/gpubgsegm/src/cuda/mog.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/limits.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace mog
    {
--- a/modules/gpubgsegm/src/cuda/mog2.cu
+++ b/modules/gpubgsegm/src/cuda/mog2.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/limits.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace mog2
    {
--- a/modules/gpubgsegm/src/gmg.cpp
+++ b/modules/gpubgsegm/src/gmg.cpp
@@ -51,7 +51,7 @@ Ptr<cuda::BackgroundSubtractorGMG> cv::cuda::createBackgroundSubtractorGMG(int,
 #else
-namespace cv { namespace cuda { namespace cudev {
+namespace cv { namespace cuda { namespace device {
    namespace gmg
    {
        void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
@@ -167,7 +167,7 @@ namespace
    void GMGImpl::apply(InputArray _frame, OutputArray _fgmask, double newLearningRate, Stream& stream)
    {
-        using namespace cv::cuda::cudev::gmg;
+        using namespace cv::cuda::device::gmg;
        typedef void (*func_t)(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
                               int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
@@ -237,7 +237,7 @@ namespace
    void GMGImpl::initialize(Size frameSize, float min, float max)
    {
-        using namespace cv::cuda::cudev::gmg;
+        using namespace cv::cuda::device::gmg;
        CV_Assert( maxFeatures_ > 0 );
        CV_Assert( learningRate_ >= 0.0f && learningRate_ <= 1.0f);
--- a/modules/gpubgsegm/src/mog.cpp
+++ b/modules/gpubgsegm/src/mog.cpp
@@ -51,7 +51,7 @@ Ptr<cuda::BackgroundSubtractorMOG> cv::cuda::createBackgroundSubtractorMOG(int,
 #else
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace mog
    {
@@ -133,7 +133,7 @@ namespace
    void MOGImpl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
    {
-        using namespace cv::cuda::cudev::mog;
+        using namespace cv::cuda::device::mog;
        GpuMat frame = _frame.getGpuMat();
@@ -164,7 +164,7 @@ namespace
    void MOGImpl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
    {
-        using namespace cv::cuda::cudev::mog;
+        using namespace cv::cuda::device::mog;
        _backgroundImage.create(frameSize_, frameType_);
        GpuMat backgroundImage = _backgroundImage.getGpuMat();
--- a/modules/gpubgsegm/src/mog2.cpp
+++ b/modules/gpubgsegm/src/mog2.cpp
@@ -51,7 +51,7 @@ Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int
 #else
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace mog2
    {
@@ -178,7 +178,7 @@ namespace
    void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
    {
-        using namespace cv::cuda::cudev::mog2;
+        using namespace cv::cuda::device::mog2;
        GpuMat frame = _frame.getGpuMat();
@@ -208,7 +208,7 @@ namespace
    void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
    {
-        using namespace cv::cuda::cudev::mog2;
+        using namespace cv::cuda::device::mog2;
        _backgroundImage.create(frameSize_, frameType_);
        GpuMat backgroundImage = _backgroundImage.getGpuMat();
@@ -218,7 +218,7 @@ namespace
    void MOG2Impl::initialize(cv::Size frameSize, int frameType)
    {
-        using namespace cv::cuda::cudev::mog2;
+        using namespace cv::cuda::device::mog2;
        CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 );
--- a/modules/gpucodec/src/cuda/nv12_to_rgb.cu
+++ b/modules/gpucodec/src/cuda/nv12_to_rgb.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/common.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f};
--- a/modules/gpucodec/src/cuda/rgb_to_yv12.cu
+++ b/modules/gpucodec/src/cuda/rgb_to_yv12.cu
@@ -43,7 +43,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/vec_traits.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    __device__ __forceinline__ void rgb_to_y(const uchar b, const uchar g, const uchar r, uchar& y)
    {
--- a/modules/gpucodec/src/video_reader.cpp
+++ b/modules/gpucodec/src/video_reader.cpp
@@ -53,7 +53,7 @@ Ptr<VideoReader> cv::gpucodec::createVideoReader(const Ptr<RawVideoSource>&) { t
 #else // HAVE_NVCUVID
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream = 0);
 }}}
@@ -125,7 +125,7 @@ namespace
    void cudaPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height)
    {
-        using namespace cv::cuda::cudev;
+        using namespace cv::cuda::device;
        // Final Stage: NV12toARGB color space conversion
--- a/modules/gpucodec/src/video_writer.cpp
+++ b/modules/gpucodec/src/video_writer.cpp
@@ -62,7 +62,7 @@ Ptr<VideoWriter> cv::gpucodec::createVideoWriter(const Ptr<EncoderCallBack>&, Si
 #else // !defined HAVE_CUDA || !defined WIN32
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream = 0);
 }}}
@@ -642,7 +642,7 @@ namespace
        if (inputFormat_ == SF_BGR)
        {
-            cudev::RGB_to_YV12(frame, frame.channels(), videoFrame_);
+            device::RGB_to_YV12(frame, frame.channels(), videoFrame_);
        }
        else
        {
--- a/modules/gpufeatures2d/src/brute_force_matcher.cpp
+++ b/modules/gpufeatures2d/src/brute_force_matcher.cpp
@@ -81,7 +81,7 @@ void cv::cuda::BFMatcher_GPU::radiusMatch(const GpuMat&, std::vector< std::vecto
 #else /* !defined (HAVE_CUDA) */
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace bf_match
    {
@@ -197,7 +197,7 @@ void cv::cuda::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& tra
    if (query.empty() || train.empty())
        return;
-    using namespace cv::cuda::cudev::bf_match;
+    using namespace cv::cuda::device::bf_match;
    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
                             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
@@ -340,7 +340,7 @@ void cv::cuda::BFMatcher_GPU::matchCollection(const GpuMat& query, const GpuMat&
    if (query.empty() || trainCollection.empty())
        return;
-    using namespace cv::cuda::cudev::bf_match;
+    using namespace cv::cuda::device::bf_match;
    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
                             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
@@ -451,7 +451,7 @@ void cv::cuda::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat&
    if (query.empty() || train.empty())
        return;
-    using namespace cv::cuda::cudev::bf_knnmatch;
+    using namespace cv::cuda::device::bf_knnmatch;
    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
                             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
@@ -577,7 +577,7 @@ void cv::cuda::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const Gpu
    if (query.empty() || trainCollection.empty())
        return;
-    using namespace cv::cuda::cudev::bf_knnmatch;
+    using namespace cv::cuda::device::bf_knnmatch;
    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
                             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
@@ -755,7 +755,7 @@ void cv::cuda::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMa
    if (query.empty() || train.empty())
        return;
-    using namespace cv::cuda::cudev::bf_radius_match;
+    using namespace cv::cuda::device::bf_radius_match;
    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
                             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
@@ -881,7 +881,7 @@ void cv::cuda::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat&
    if (query.empty() || empty())
        return;
-    using namespace cv::cuda::cudev::bf_radius_match;
+    using namespace cv::cuda::device::bf_radius_match;
    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
                             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
--- a/modules/gpufeatures2d/src/cuda/bf_knnmatch.cu
+++ b/modules/gpufeatures2d/src/cuda/bf_knnmatch.cu
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/datamov_utils.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace bf_knnmatch
    {
--- a/modules/gpufeatures2d/src/cuda/bf_match.cu
+++ b/modules/gpufeatures2d/src/cuda/bf_match.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/vec_distance.hpp"
 #include "opencv2/core/cuda/datamov_utils.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace bf_match
    {
--- a/modules/gpufeatures2d/src/cuda/bf_radius_match.cu
+++ b/modules/gpufeatures2d/src/cuda/bf_radius_match.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/vec_distance.hpp"
 #include "opencv2/core/cuda/datamov_utils.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace bf_radius_match
    {
--- a/modules/gpufeatures2d/src/cuda/fast.cu
+++ b/modules/gpufeatures2d/src/cuda/fast.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace fast
    {
--- a/modules/gpufeatures2d/src/cuda/orb.cu
+++ b/modules/gpufeatures2d/src/cuda/orb.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/functional.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace orb
    {
--- a/modules/gpufeatures2d/src/fast.cpp
+++ b/modules/gpufeatures2d/src/fast.cpp
@@ -108,7 +108,7 @@ void cv::cuda::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuM
    keypoints.cols = getKeyPoints(keypoints);
 }
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace fast
    {
@@ -119,7 +119,7 @@ namespace cv { namespace cuda { namespace cudev
 int cv::cuda::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
 {
-    using namespace cv::cuda::cudev::fast;
+    using namespace cv::cuda::device::fast;
    CV_Assert(img.type() == CV_8UC1);
    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
@@ -142,7 +142,7 @@ int cv::cuda::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& m
 int cv::cuda::FAST_GPU::getKeyPoints(GpuMat& keypoints)
 {
-    using namespace cv::cuda::cudev::fast;
+    using namespace cv::cuda::device::fast;
    if (count_ == 0)
        return 0;
--- a/modules/gpufeatures2d/src/orb.cpp
+++ b/modules/gpufeatures2d/src/orb.cpp
@@ -62,7 +62,7 @@ void cv::cuda::ORB_GPU::mergeKeyPoints(GpuMat&) { throw_no_cuda(); }
 #else /* !defined (HAVE_CUDA) */
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace orb
    {
@@ -431,7 +431,7 @@ cv::cuda::ORB_GPU::ORB_GPU(int nFeatures, float scaleFactor, int nLevels, int ed
        ++v_0;
    }
    CV_Assert(u_max.size() < 32);
-    cv::cuda::cudev::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
+    cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
    // Calc pattern
    const int npoints = 512;
@@ -543,7 +543,7 @@ namespace
    //takes keypoints and culls them by the response
    void cull(GpuMat& keypoints, int& count, int n_points)
    {
-        using namespace cv::cuda::cudev::orb;
+        using namespace cv::cuda::device::orb;
        //this is only necessary if the keypoints size is greater than the number of desired points.
        if (count > n_points)
@@ -561,7 +561,7 @@ namespace
 void cv::cuda::ORB_GPU::computeKeyPointsPyramid()
 {
-    using namespace cv::cuda::cudev::orb;
+    using namespace cv::cuda::device::orb;
    int half_patch_size = patchSize_ / 2;
@@ -604,7 +604,7 @@ void cv::cuda::ORB_GPU::computeKeyPointsPyramid()
 void cv::cuda::ORB_GPU::computeDescriptors(GpuMat& descriptors)
 {
-    using namespace cv::cuda::cudev::orb;
+    using namespace cv::cuda::device::orb;
    int nAllkeypoints = 0;
@@ -644,7 +644,7 @@ void cv::cuda::ORB_GPU::computeDescriptors(GpuMat& descriptors)
 void cv::cuda::ORB_GPU::mergeKeyPoints(GpuMat& keypoints)
 {
-    using namespace cv::cuda::cudev::orb;
+    using namespace cv::cuda::device::orb;
    int nAllkeypoints = 0;
--- a/modules/gpufilters/src/cuda/column_filter.hpp
+++ b/modules/gpufilters/src/cuda/column_filter.hpp
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/border_interpolate.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace column_filter
 {
--- a/modules/gpufilters/src/cuda/filter2d.cu
+++ b/modules/gpufilters/src/cuda/filter2d.cu
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <class SrcPtr, typename D>
    __global__ void filter2D(const SrcPtr src, PtrStepSz<D> dst,
--- a/modules/gpufilters/src/cuda/row_filter.hpp
+++ b/modules/gpufilters/src/cuda/row_filter.hpp
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/border_interpolate.hpp"
 using namespace cv::cuda;
-using namespace cv::cuda::cudev;
+using namespace cv::cuda::device;
 namespace row_filter
 {
--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@@ -175,7 +175,7 @@ Ptr<Filter> cv::cuda::createBoxFilter(int srcType, int dstType, Size ksize, Poin
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Linear Filter
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename D>
    void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
@@ -230,22 +230,22 @@ namespace
        switch (srcType)
        {
        case CV_8UC1:
-            func_ = cv::cuda::cudev::filter2D<uchar, uchar>;
+            func_ = cv::cuda::device::filter2D<uchar, uchar>;
            break;
        case CV_8UC4:
-            func_ = cv::cuda::cudev::filter2D<uchar4, uchar4>;
+            func_ = cv::cuda::device::filter2D<uchar4, uchar4>;
            break;
        case CV_16UC1:
-            func_ = cv::cuda::cudev::filter2D<ushort, ushort>;
+            func_ = cv::cuda::device::filter2D<ushort, ushort>;
            break;
        case CV_16UC4:
-            func_ = cv::cuda::cudev::filter2D<ushort4, ushort4>;
+            func_ = cv::cuda::device::filter2D<ushort4, ushort4>;
            break;
        case CV_32FC1:
-            func_ = cv::cuda::cudev::filter2D<float, float>;
+            func_ = cv::cuda::device::filter2D<float, float>;
            break;
        case CV_32FC4:
-            func_ = cv::cuda::cudev::filter2D<float4, float4>;
+            func_ = cv::cuda::device::filter2D<float4, float4>;
            break;
        }
    }
--- a/modules/gpuimgproc/src/bilateral_filter.cpp
+++ b/modules/gpuimgproc/src/bilateral_filter.cpp
@@ -51,7 +51,7 @@ void cv::cuda::bilateralFilter(InputArray, OutputArray, int, float, float, int,
 #else
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace imgproc
    {
@@ -62,7 +62,7 @@ namespace cv { namespace cuda { namespace cudev
 void cv::cuda::bilateralFilter(InputArray _src, OutputArray _dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& stream)
 {
-    using cv::cuda::cudev::imgproc::bilateral_filter_gpu;
+    using cv::cuda::device::imgproc::bilateral_filter_gpu;
    typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s);
--- a/modules/gpuimgproc/src/blend.cpp
+++ b/modules/gpuimgproc/src/blend.cpp
@@ -54,7 +54,7 @@ void cv::cuda::blendLinear(InputArray, InputArray, InputArray, InputArray, Outpu
 ////////////////////////////////////////////////////////////////////////
 // blendLinear
-namespace cv { namespace cuda { namespace cudev
+namespace cv { namespace cuda { namespace device
 {
    namespace blend
    {
@@ -65,7 +65,7 @@ namespace cv { namespace cuda { namespace cudev
    }
 }}}
-using namespace ::cv::cuda::cudev::blend;
+using namespace ::cv::cuda::device::blend;
 void cv::cuda::blendLinear(InputArray _img1, InputArray _img2, InputArray _weights1, InputArray _weights2,
                          OutputArray _result, Stream& stream)
--- a/Show More
+++ b/Show More