removed BEGIN_OPENCV_DEVICE_NAMESPACE macros

2011-11-14 09:02:06 +00:00 · 2011-11-14 09:02:06 +00:00 · 0f53f2993e
commit 0f53f2993e
parent d926541311
73 changed files with 19272 additions and 19504 deletions
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@ -425,21 +425,20 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
 ////////////////////////////////////////////////////////////////////////
 // Polar <-> Cart

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace mathfunc 
    {
        void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
        void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
    inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
+        using namespace ::cv::gpu::device::mathfunc;

        CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
        CV_Assert(x.depth() == CV_32F);
@ -459,7 +458,7 @@ namespace

    inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
+        using namespace ::cv::gpu::device::mathfunc;

        CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
        CV_Assert(mag.depth() == CV_32F);
--- a/modules/gpu/src/bilateral_filter.cpp
+++ b/modules/gpu/src/bilateral_filter.cpp
@ -55,8 +55,8 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace bilateral_filter
    {
        void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
@ -64,10 +64,9 @@ namespace bilateral_filter
        void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
        void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE_ bilateral_filter;
+using namespace ::cv::gpu::device::bilateral_filter;

 namespace
 {
--- a/modules/gpu/src/blend.cpp
+++ b/modules/gpu/src/blend.cpp
@ -52,8 +52,8 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu

 #else

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace blend
    {
        template <typename T>
@ -61,10 +61,9 @@ namespace blend

        void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE_ blend;
+using namespace ::cv::gpu::device::blend;

 void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, 
                          GpuMat& result, Stream& stream)
--- a/modules/gpu/src/brute_force_matcher.cpp
+++ b/modules/gpu/src/brute_force_matcher.cpp
@ -82,8 +82,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace bf_match
    {
        template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, 
@ -154,8 +154,7 @@ namespace bf_radius_match
            const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, 
            int cc, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 ////////////////////////////////////////////////////////////////////
 // Train collection
@ -199,7 +198,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
    if (query.empty() || train.empty())
        return;

-    using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
+    using namespace ::cv::gpu::device::bf_match;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, 
                             const DevMem2Di& trainIdx, const DevMem2Df& distance,
@ -341,7 +340,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
    if (query.empty() || trainCollection.empty())
        return;

-    using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
+    using namespace ::cv::gpu::device::bf_match;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, 
                             const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, 
@ -452,7 +451,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
    if (query.empty() || train.empty())
        return;

-    using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
+    using namespace ::cv::gpu::device::bf_knnmatch;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, 
                             const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, 
@ -581,7 +580,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
    if (query.empty() || trainCollection.empty())
        return;

-    using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
+    using namespace ::cv::gpu::device::bf_knnmatch;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, 
                             const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, 
@ -762,7 +761,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
    if (query.empty() || train.empty())
        return;

-    using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
+    using namespace ::cv::gpu::device::bf_radius_match;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, 
                             const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, 
@ -893,7 +892,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
    if (query.empty() || empty())
        return;

-    using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
+    using namespace ::cv::gpu::device::bf_radius_match;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, 
                             const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, 
--- a/modules/gpu/src/calib3d.cpp
+++ b/modules/gpu/src/calib3d.cpp
@ -56,8 +56,8 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat

 #else

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace transform_points 
    {
        void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
@ -77,10 +77,9 @@ namespace solve_pnp_ransac
                const float3* transl_vectors, const float3* object, const float2* image,
                const float dist_threshold, int* hypothesis_scores);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE;
+using namespace ::cv::gpu::device;

 namespace
 {
--- a/modules/gpu/src/color.cpp
+++ b/modules/gpu/src/color.cpp
@ -51,8 +51,8 @@ void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
 #define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
    void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);

@ -199,10 +199,9 @@ OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(hls4_to_bgra)
    #undef OPENCV_GPU_DECLARE_CVTCOLOR_ONE
    #undef OPENCV_GPU_DECLARE_CVTCOLOR_ALL
    #undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE;
+using namespace ::cv::gpu::device;

 namespace
 {
--- a/modules/gpu/src/cuda/bf_knnmatch.cu
+++ b/modules/gpu/src/cuda/bf_knnmatch.cu
@ -45,10 +45,10 @@
 #include "opencv2/gpu/device/vec_distance.hpp"
 #include "opencv2/gpu/device/datamov_utils.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace bf_knnmatch {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace bf_knnmatch 
+    {
        ///////////////////////////////////////////////////////////////////////////////
        // Reduction

@ -1155,7 +1155,5 @@ template void match2Hamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db&
        template void match2Hamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
        //template void match2Hamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
        template void match2Hamming_gpu<int   >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
-
    } // namespace bf_knnmatch
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device {
--- a/modules/gpu/src/cuda/bf_match.cu
+++ b/modules/gpu/src/cuda/bf_match.cu
@ -45,10 +45,10 @@
 #include "opencv2/gpu/device/vec_distance.hpp"
 #include "opencv2/gpu/device/datamov_utils.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace bf_match {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace bf_match 
+    {
        ///////////////////////////////////////////////////////////////////////////////
        // Reduction

@ -774,7 +774,5 @@ template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db&
        template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
        //template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
        template void matchHamming_gpu<int   >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
-
    } // namespace bf_match
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device {
--- a/modules/gpu/src/cuda/bf_radius_match.cu
+++ b/modules/gpu/src/cuda/bf_radius_match.cu
@ -45,10 +45,10 @@
 #include "opencv2/gpu/device/vec_distance.hpp"
 #include "opencv2/gpu/device/datamov_utils.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace bf_radius_match {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace bf_radius_match 
+    {
        ///////////////////////////////////////////////////////////////////////////////
        // Match Unrolled

@ -461,7 +461,5 @@ template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db*
        template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
        //template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
        template void matchHamming_gpu<int   >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
-
    } // namespace bf_radius_match
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/bilateral_filter.cu
+++ b/modules/gpu/src/cuda/bilateral_filter.cu
@ -43,10 +43,10 @@
 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/limits.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace bilateral_filter {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace bilateral_filter 
+    {
        __constant__ float* ctable_color;
        __constant__ float* ctable_space;
        __constant__ size_t ctable_space_step;
@ -222,7 +222,5 @@ void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, in
        {
            bilateral_filter_caller(disp, img, channels, iters, stream);
        }
-
    } // namespace bilateral_filter
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/blend.cu
+++ b/modules/gpu/src/cuda/blend.cu
@ -42,10 +42,10 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace blend {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace blend 
+    {
        template <typename T>
        __global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
                                          const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
@ -112,7 +112,5 @@ void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, Ptr
            if (stream == 0)
                cudaSafeCall(cudaDeviceSynchronize());
        }
-
    } // namespace blend 
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/calib3d.cu
+++ b/modules/gpu/src/cuda/calib3d.cu
@ -44,8 +44,8 @@
 #include "opencv2/gpu/device/transform.hpp"
 #include "opencv2/gpu/device/functional.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200

    namespace transform_points
@ -74,7 +74,7 @@ namespace transform_points
            cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
-        OPENCV_DEVICE_NAMESPACE_ transform(src, dst, TransformOp(), stream);
+            ::cv::gpu::device::transform(src, dst, TransformOp(), stream);
        }
    } // namespace transform_points

@ -113,7 +113,7 @@ namespace project_points
            cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
            cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
-        OPENCV_DEVICE_NAMESPACE_ transform(src, dst, ProjectOp(), stream);
+            ::cv::gpu::device::transform(src, dst, ProjectOp(), stream);
        }
    } // namespace project_points

@ -188,5 +188,4 @@ namespace solve_pnp_ransac
            cudaSafeCall( cudaDeviceSynchronize() );
        }
    } // namespace solvepnp_ransac
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/canny.cu
+++ b/modules/gpu/src/cuda/canny.cu
@ -44,10 +44,10 @@
 #include <algorithm>
 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace canny {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace canny 
+    {
        __global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
        {
            __shared__ int smem[16][18];
@ -487,7 +487,5 @@ void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)

            cudaSafeCall(cudaThreadSynchronize());
        }
-
    } // namespace canny
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
@ -44,177 +44,177 @@
 #include "opencv2/gpu/device/transform.hpp"
 #include "opencv2/gpu/device/color.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
+namespace cv { namespace gpu { namespace device 
+{
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_x = 8 };
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
    {
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
    {
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };    

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
    };
-DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
@ -226,7 +226,7 @@ DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
        traits::functor_type functor = traits::create_functor(); \
        typedef typename traits::functor_type::argument_type src_t; \
        typedef typename traits::functor_type::result_type   dst_t; \
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
+        ::cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
    }

 #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
@ -376,5 +376,4 @@ OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgra)
    #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
    #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
    #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/column_filter.cu
+++ b/modules/gpu/src/cuda/column_filter.cu
@ -47,16 +47,16 @@
 #include "opencv2/gpu/device/limits.hpp"
 #include "opencv2/gpu/device/border_interpolate.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    #define MAX_KERNEL_SIZE 16
    #define BLOCK_DIM_X 16
    #define BLOCK_DIM_Y 4
    #define RESULT_STEPS 8
    #define HALO_STEPS 1

-namespace column_filter {
-
+    namespace column_filter 
+    {
        __constant__ float c_kernel[MAX_KERNEL_SIZE];

        void loadKernel(const float kernel[], int ksize)
@ -243,7 +243,5 @@ template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const
        template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
        template void linearColumnFilter_gpu<float , int   >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
        template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
-
    } // namespace column_filter
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/copy_make_border.cu
+++ b/modules/gpu/src/cuda/copy_make_border.cu
@ -43,10 +43,10 @@
 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/border_interpolate.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace imgproc {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace imgproc 
+    {
        template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
@ -123,7 +123,5 @@ template void copyMakeBorder_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db
        //template void copyMakeBorder_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
        template void copyMakeBorder_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
        template void copyMakeBorder_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
-
    } // namespace imgproc
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
@ -47,8 +47,8 @@
 #include "opencv2/gpu/device/limits.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    //////////////////////////////////////////////////////////////////////////
    // add

@ -84,9 +84,9 @@ template <> struct TransformFunctorTraits< Add<float, float> > : DefaultTransfor
    template <typename T, typename D> void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
    {
        if (mask.data)
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Add<T, D>(), stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Add<T, D>(), stream);
        else
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), stream);
    }

    template void add_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@ -181,9 +181,9 @@ template <typename T, typename D> void add_gpu(const DevMem2Db& src1, double val
        cudaSafeCall( cudaSetDoubleForDevice(&val) );
        AddScalar<T, D> op(val);
        if (mask.data)
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
        else
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
    }

    template void add_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@ -277,9 +277,9 @@ template <> struct TransformFunctorTraits< Subtract<float, float> > : DefaultTra
    template <typename T, typename D> void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
    {
        if (mask.data)
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Subtract<T, D>(), stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Subtract<T, D>(), stream);
        else
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), stream);
    }

    template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@ -374,9 +374,9 @@ template <typename T, typename D> void subtract_gpu(const DevMem2Db& src1, doubl
        cudaSafeCall( cudaSetDoubleForDevice(&val) );
        SubtractScalar<T, D> op(val);
        if (mask.data)
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
        else
-        OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
    }

    template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@ -453,7 +453,7 @@ struct multiply_8uc4_32f : binary_function<uint, float, uint>
        }
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(multiply_8uc4_32f)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(multiply_8uc4_32f)
    {
        enum { smart_block_dim_x = 8 };
        enum { smart_block_dim_y = 8 };
@ -462,7 +462,7 @@ DEFINE_TRANSFORM_FUNCTOR_TRAITS(multiply_8uc4_32f)

    void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream)
    {
-    transform(static_cast< DevMem2D_<uint> >(src1), src2, static_cast< DevMem2D_<uint> >(dst), multiply_8uc4_32f(), stream);
+        ::cv::gpu::device::transform(static_cast< DevMem2D_<uint> >(src1), src2, static_cast< DevMem2D_<uint> >(dst), multiply_8uc4_32f(), stream);
    }

    struct multiply_16sc4_32f : binary_function<short4, float, short4>
@ -474,7 +474,7 @@ struct multiply_16sc4_32f : binary_function<short4, float, short4>
        }
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(multiply_16sc4_32f)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(multiply_16sc4_32f)
    {
        enum { smart_block_dim_x = 8 };
        enum { smart_block_dim_y = 8 };
@ -483,8 +483,7 @@ DEFINE_TRANSFORM_FUNCTOR_TRAITS(multiply_16sc4_32f)

    void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream)
    {
-    transform(static_cast< DevMem2D_<short4> >(src1), src2, 
-              static_cast< DevMem2D_<short4> >(dst), multiply_16sc4_32f(), stream);
+        ::cv::gpu::device::transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), multiply_16sc4_32f(), stream);
    }

    template <typename T, typename D> struct Multiply : binary_function<T, T, D>
@ -522,7 +521,7 @@ template <typename T, typename D> void multiply_gpu(const DevMem2Db& src1, const
    {
        cudaSafeCall( cudaSetDoubleForDevice(&scale) );
        Multiply<T, D> op(scale);
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
    }

    template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
@ -618,7 +617,7 @@ template <typename T, typename D> void multiply_gpu(const DevMem2Db& src1, doubl
        cudaSafeCall( cudaSetDoubleForDevice(&val) );
        cudaSafeCall( cudaSetDoubleForDevice(&scale) );
        MultiplyScalar<T, D> op(val, scale);
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
    }

    template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
@ -690,7 +689,7 @@ struct divide_8uc4_32f : binary_function<uchar4, float, uchar4>
        }
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(divide_8uc4_32f)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(divide_8uc4_32f)
    {
        enum { smart_block_dim_x = 8 };
        enum { smart_block_dim_y = 8 };
@ -713,7 +712,7 @@ struct divide_16sc4_32f : binary_function<short4, float, short4>
        }
    };

-DEFINE_TRANSFORM_FUNCTOR_TRAITS(divide_16sc4_32f)
+    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(divide_16sc4_32f)
    {
        enum { smart_block_dim_x = 8 };
        enum { smart_block_dim_y = 8 };
@ -760,7 +759,7 @@ template <typename T, typename D> void divide_gpu(const DevMem2Db& src1, const D
    {
        cudaSafeCall( cudaSetDoubleForDevice(&scale) );
        Divide<T, D> op(scale);
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
    }

    template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
@ -856,7 +855,7 @@ template <typename T, typename D> void divide_gpu(const DevMem2Db& src1, double
        cudaSafeCall( cudaSetDoubleForDevice(&val) );
        cudaSafeCall( cudaSetDoubleForDevice(&scale) );
        DivideScalar<T, D> op(val, scale);
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
    }

    template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
@ -950,7 +949,7 @@ template <typename T, typename D> void divide_gpu(double scalar, const DevMem2Db
    {
        cudaSafeCall( cudaSetDoubleForDevice(&scalar) );
        Reciprocal<T, D> op(scalar);
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
    }

    template void divide_gpu<uchar, uchar >(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@ -1056,7 +1055,7 @@ template <> struct TransformFunctorTraits< Absdiff<float> > : DefaultTransformFu

    template <typename T> void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
    {
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
    }

    //template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@ -1102,7 +1101,7 @@ template <typename T> void absdiff_gpu(const DevMem2Db& src1, double val, const
    {
        cudaSafeCall( cudaSetDoubleForDevice(&val) );
        AbsdiffScalar<T> op(val);
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, stream);
    }

    template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
@ -1189,7 +1188,7 @@ template <> struct TransformFunctorTraits< LessEqual<float> > : DefaultTransform
    template <template <typename> class Op, typename T> void compare(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
    {
        Op<T> op;
-    OPENCV_DEVICE_NAMESPACE_ transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, stream);
+        ::cv::gpu::device::transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, stream);
    }

    template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
@ -1512,7 +1511,6 @@ template void bitwiseMaskXorCaller<uchar>(int, int, int, const PtrStepb, const P
    template void bitwiseMaskXorCaller<ushort>(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
    template void bitwiseMaskXorCaller<uint>(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);

-
    //////////////////////////////////////////////////////////////////////////
    // min/max

@ -1548,7 +1546,7 @@ template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > :
    template <typename T>
    void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
    {
-    OPENCV_DEVICE_NAMESPACE_ transform(src1, src2, dst, minimum<T>(), stream);    
+        ::cv::gpu::device::transform(src1, src2, dst, minimum<T>(), stream);    
    }

    template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@ -1562,7 +1560,7 @@ template void min_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<dou
    template <typename T>
    void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
    {
-    OPENCV_DEVICE_NAMESPACE_ transform(src1, src2, dst, maximum<T>(), stream);    
+        ::cv::gpu::device::transform(src1, src2, dst, maximum<T>(), stream);    
    }

    template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@ -1576,7 +1574,7 @@ template void max_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<dou
    template <typename T>
    void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
    {
-    OPENCV_DEVICE_NAMESPACE_ transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);    
+        ::cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);    
    }

    template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
@ -1590,7 +1588,7 @@ template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const
    template <typename T>
    void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
    {
-    OPENCV_DEVICE_NAMESPACE_ transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);    
+        ::cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);    
    }

    template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
@ -1601,7 +1599,6 @@ template void max_gpu<int   >(const DevMem2D_<int>& src1, int src2, const DevMem
    template void max_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream);
    template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);

-
    //////////////////////////////////////////////////////////////////////////
    // threshold

@ -1642,7 +1639,7 @@ void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh
        cudaStream_t stream)
    {
        Op<T> op(thresh, maxVal);
-    OPENCV_DEVICE_NAMESPACE_ transform(src, dst, op, stream);
+        ::cv::gpu::device::transform(src, dst, op, stream);
    }

    template <typename T>
@ -1672,9 +1669,6 @@ template void threshold_gpu<int>(const DevMem2Db& src, const DevMem2Db& dst, int
    template void threshold_gpu<float>(const DevMem2Db& src, const DevMem2Db& dst, float thresh, float maxVal, int type, cudaStream_t stream);
    template void threshold_gpu<double>(const DevMem2Db& src, const DevMem2Db& dst, double thresh, double maxVal, int type, cudaStream_t stream);

-
-
-
    //////////////////////////////////////////////////////////////////////////
    // pow

@ -1743,7 +1737,7 @@ template <typename T> struct TransformFunctorTraits< PowOp<T> > : detail::PowOpT
    template<typename T>
    void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream)
    {
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
    }   

    template void pow_caller<uchar>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
@ -1753,9 +1747,6 @@ template void pow_caller<ushort>(const DevMem2Db& src, float power, DevMem2Db ds
    template void pow_caller<int>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
    template void pow_caller<float>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);

-
-
-
    //////////////////////////////////////////////////////////////////////////
    // addWeighted

@ -1838,7 +1829,7 @@ void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2,

        AddWeighted<T1, T2, D> op(alpha, beta, gamma);

-    OPENCV_DEVICE_NAMESPACE_ transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, stream);
+        ::cv::gpu::device::transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, stream);
    }

    template void addWeighted_gpu<uchar, uchar, uchar>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
@ -2076,5 +2067,4 @@ template void addWeighted_gpu<double, double, short>(const DevMem2Db& src1, doub
    template void addWeighted_gpu<double, double, int>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
    template void addWeighted_gpu<double, double, float>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
    template void addWeighted_gpu<double, double, double>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/hist.cu
+++ b/modules/gpu/src/cuda/hist.cu
@ -45,8 +45,8 @@
 #include "opencv2/gpu/device/utility.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    #define UINT_BITS 32U

    //Warps == subhistograms per threadblock
@ -65,8 +65,8 @@ BEGIN_OPENCV_DEVICE_NAMESPACE

    #define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)

-namespace hist {
-
+    namespace hist 
+    {
        #if (!USE_SMEM_ATOMICS)

            #define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
@ -215,7 +215,5 @@ void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }
-
    } // namespace hist
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/hog.cu
+++ b/modules/gpu/src/cuda/hog.cu
@ -42,16 +42,16 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    // Other values are not supported
    #define CELL_WIDTH 8
    #define CELL_HEIGHT 8
    #define CELLS_PER_BLOCK_X 2
    #define CELLS_PER_BLOCK_Y 2

-namespace hog {
-
+    namespace hog 
+    {
        __constant__ int cnbins;
        __constant__ int cblock_stride_x;
        __constant__ int cblock_stride_y;
@ -769,7 +769,5 @@ static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)

        void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
        void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
-
    } // namespace hog 
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@ -46,10 +46,10 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/border_interpolate.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace imgproc {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace imgproc 
+    {
        /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////

        texture<uchar4, 2> tex_meanshift;
@ -1029,7 +1029,5 @@ void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHe
            if (stream == 0)
                cudaSafeCall(cudaDeviceSynchronize());
        }
-
    } // namespace imgproc
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device {
--- a/modules/gpu/src/cuda/internal_shared.hpp
+++ b/modules/gpu/src/cuda/internal_shared.hpp
@ -50,7 +50,7 @@
 #include "safe_call.hpp"

 #ifndef CV_PI
-#define CV_PI   3.1415926535897932384626433832795f
+#define CV_PI   3.1415926535897932384626433832795
 #endif

 #ifndef CV_PI_F
@ -61,15 +61,10 @@
  #endif
 #endif

-#define BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device { 
-#define END_OPENCV_DEVICE_NAMESPACE   }}}
-#define OPENCV_DEVICE_NAMESPACE       ::cv::gpu::device
-#define OPENCV_DEVICE_NAMESPACE_      ::cv::gpu::device:: 
-
 #ifdef __CUDACC__

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    typedef unsigned char uchar;
    typedef unsigned short ushort;
    typedef signed char schar;
@ -80,8 +75,7 @@ template<class T> static inline void bindTexture(const textureReference* tex, co
        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
        cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 #endif

@ -102,87 +96,6 @@ namespace cv { namespace gpu

    static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }

-    /*template<class T> static inline void uploadConstant(const char* name, const T& value) 
-    { 
-        cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); 
-    }
-
-    template<class T> static inline void uploadConstant(const char* name, const T& value, cudaStream_t stream) 
-    {
-        cudaSafeCall( cudaMemcpyToSymbolAsync(name, &value, sizeof(T), 0, cudaMemcpyHostToDevice, stream) ); 
-    }   */     
-
-    //template<class T> static inline void bindTexture(const char* name, const DevMem2D_<T>& img)
-    //{            
-    //    //!!!! const_cast is disabled!
-    //    //!!!! Please use constructor of 'class texture'  instead.
-    //
-    //    //textureReference* tex; 
-    //    //cudaSafeCall( cudaGetTextureReference((const textureReference**)&tex, name) ); 
-    //    //tex->normalized = normalized;
-    //    //tex->filterMode = filterMode;
-    //    //tex->addressMode[0] = addrMode;
-    //    //tex->addressMode[1] = addrMode;
-    //    
-    //    const textureReference* tex; 
-    //    cudaSafeCall( cudaGetTextureReference(&tex, name) ); 
-    //
-    //    cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
-    //    cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
-    //}
-
-    //static inline void unbindTexture(const char *name)
-    //{
-    //    const textureReference* tex; 
-    //    cudaSafeCall( cudaGetTextureReference(&tex, name) ); 
-    //    cudaSafeCall( cudaUnbindTexture(tex) );
-    //}
-
-    
-
-    //class TextureBinder
-    //{
-    //public:
-    //    TextureBinder() : tex_(0) {}
-    //    template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
-    //    {
-    //        bind(tex, img);
-    //    }
-    //    template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
-    //    {
-    //        bind(tex_name, img);
-    //    }
-    //    ~TextureBinder() { unbind(); }
-    //
-    //    template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
-    //    {
-    //        unbind();
-    //
-    //        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
-    //        cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
-    //
-    //        tex_ = tex;
-    //    }
-    //    template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
-    //    {
-    //        const textureReference* tex; 
-    //        cudaSafeCall( cudaGetTextureReference(&tex, tex_name) ); 
-    //        bind(tex, img);
-    //    }
-    //
-    //    void unbind()
-    //    {
-    //        if (tex_)
-    //        {
-    //            cudaUnbindTexture(tex_);
-    //            tex_ = 0;
-    //        }
-    //    }
-    //
-    //private:
-    //    const textureReference* tex_;
-    //};
-
    class NppStreamHandler
    {
    public:
--- a/modules/gpu/src/cuda/match_template.cu
+++ b/modules/gpu/src/cuda/match_template.cu
@ -43,10 +43,10 @@
 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/vec_math.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace match_template {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace match_template 
+    {
        __device__ __forceinline__ float sum(float v) { return v; }
        __device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
        __device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; }
@ -905,7 +905,5 @@ void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cu
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }
-
    } //namespace match_template
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
@ -42,10 +42,10 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace mathfunc {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace mathfunc 
+    {
        //////////////////////////////////////////////////////////////////////////////////////
        // Cart <-> Polar

@ -209,7 +209,5 @@ void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, b

            callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
        }
-
    } // namespace mathfunc
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/matrix_operations.cu
+++ b/modules/gpu/src/cuda/matrix_operations.cu
@ -45,8 +45,8 @@
 #include "opencv2/gpu/device/transform.hpp"
 #include "opencv2/gpu/device/functional.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T> struct shift_and_sizeof;
    template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
    template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
@ -303,7 +303,7 @@ void cvt_(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta,
        cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
        cudaSafeCall( cudaSetDoubleForDevice(&beta) );
        Convertor<T, D> op(alpha, beta);
-    OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
+        ::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
    }

    void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, 
@ -344,5 +344,4 @@ void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int dde

        func(src, dst, alpha, beta, stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
@ -46,10 +46,10 @@
 #include "opencv2/gpu/device/vec_math.hpp"
 #include "opencv2/gpu/device/transform.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace matrix_reductions {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace matrix_reductions 
+    {
        // Performs reduction in shared memory
        template <int size, typename T>
        __device__ void sumInSmem(volatile T* data, const uint tid)
@ -2082,7 +2082,5 @@ template void reduceCols_gpu<int, int, int>(const DevMem2Db& src, int cn, const
        template void reduceCols_gpu<int, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);

        template void reduceCols_gpu<float, float, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
-
    } // namespace mattrix_reductions
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/pyr_down.cu
+++ b/modules/gpu/src/cuda/pyr_down.cu
@ -46,10 +46,10 @@
 #include "opencv2/gpu/device/vec_math.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace imgproc {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace imgproc 
+    {
        template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
        {
            typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
@ -181,7 +181,5 @@ template void pyrDown_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst,
        template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
        template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
        template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
-
    } // namespace imgproc
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/pyr_up.cu
+++ b/modules/gpu/src/cuda/pyr_up.cu
@ -46,10 +46,10 @@
 #include "opencv2/gpu/device/vec_math.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace imgproc {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace imgproc 
+    {
        template <typename T, typename B> __global__ void pyrUp(const PtrStep<T> src, DevMem2D_<T> dst, const B b)
        {
            typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
@ -176,7 +176,5 @@ template void pyrUp_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, in
        template void pyrUp_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
        template void pyrUp_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
        template void pyrUp_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
-
    } // namespace imgproc
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/remap.cu
+++ b/modules/gpu/src/cuda/remap.cu
@ -47,10 +47,10 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/filters.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace imgproc {
-    
+namespace cv { namespace gpu { namespace device 
+{
+    namespace imgproc 
+    {    
        template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
@ -248,7 +248,5 @@ template void remap_gpu<float >(const DevMem2Db& src, const DevMem2Df& xmap, con
        //template void remap_gpu<float2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
        template void remap_gpu<float3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
        template void remap_gpu<float4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-
    } // namespace imgproc
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/resize.cu
+++ b/modules/gpu/src/cuda/resize.cu
@ -47,10 +47,10 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/filters.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace imgproc {
-    
+namespace cv { namespace gpu { namespace device 
+{
+    namespace imgproc 
+    {    
        template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
@ -259,7 +259,5 @@ template void resize_gpu<float >(const DevMem2Db& src, float fx, float fy, const
        //template void resize_gpu<float2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
        template void resize_gpu<float3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
        template void resize_gpu<float4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-
    } // namespace imgproc
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/row_filter.cu
+++ b/modules/gpu/src/cuda/row_filter.cu
@ -47,16 +47,16 @@
 #include "opencv2/gpu/device/limits.hpp"
 #include "opencv2/gpu/device/border_interpolate.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    #define MAX_KERNEL_SIZE 16
    #define BLOCK_DIM_X 16
    #define BLOCK_DIM_Y 4
    #define RESULT_STEPS 8
    #define HALO_STEPS 1

-namespace row_filter {
-
+    namespace row_filter 
+    {
        __constant__ float c_kernel[MAX_KERNEL_SIZE];

        void loadKernel(const float kernel[], int ksize)
@ -266,7 +266,5 @@ template void linearRowFilter_gpu<uchar4, float4>(const DevMem2Db& src, const De
        template void linearRowFilter_gpu<short3, float3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
        template void linearRowFilter_gpu<int   , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
        template void linearRowFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
-
    } // namespace row_filter
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/safe_call.hpp
+++ b/modules/gpu/src/cuda/safe_call.hpp
@ -62,8 +62,8 @@
    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__)
 #endif

-namespace cv { namespace gpu {
-
+namespace cv { namespace gpu 
+{
    void error(const char *error_string, const char *file, const int line, const char *func = "");
    void nppError(int err, const char *file, const int line, const char *func = "");
    void ncvError(int err, const char *file, const int line, const char *func = "");
@ -99,7 +99,6 @@ static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const
        if (CUBLAS_STATUS_SUCCESS != err)
            cv::gpu::cublasError(err, file, line, func);
    }
-
 }}

 #endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
--- a/modules/gpu/src/cuda/split_merge.cu
+++ b/modules/gpu/src/cuda/split_merge.cu
@ -42,10 +42,10 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace split_merge {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace split_merge 
+    {
        template <typename T, size_t elem_size = sizeof(T)>
        struct TypeTraits 
        {
@ -502,7 +502,5 @@ void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t

            split_func(src, dst, stream);
        }
-
    } // namespace split_merge
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/stereobm.cu
+++ b/modules/gpu/src/cuda/stereobm.cu
@ -42,10 +42,10 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace stereobm {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace stereobm 
+    {
        //////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
        //////////////////////////////////////////////////////////////////////////////////////////////////
@ -531,7 +531,5 @@ void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturen

            cudaSafeCall( cudaUnbindTexture (texForTF) );
        }
-
    } // namespace stereobm
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/stereobp.cu
+++ b/modules/gpu/src/cuda/stereobp.cu
@ -44,10 +44,10 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/limits.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace stereobp {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace stereobp 
+    {
        ///////////////////////////////////////////////////////////////
        /////////////////////// load constants ////////////////////////
        ///////////////////////////////////////////////////////////////
@ -526,7 +526,5 @@ void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, cons

        template void output_gpu<short>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
        template void output_gpu<float>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
-
    } // namespace stereobp
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cuda/stereocsbp.cu
+++ b/modules/gpu/src/cuda/stereocsbp.cu
@ -44,10 +44,10 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/limits.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace stereocsbp {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace stereocsbp 
+    {
        ///////////////////////////////////////////////////////////////
        /////////////////////// load constants ////////////////////////
        ///////////////////////////////////////////////////////////////
@ -889,7 +889,5 @@ template void compute_disp(const short* u, const short* d, const short* l, const

        template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
            const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
-
    } // namespace stereocsbp
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device {
--- a/modules/gpu/src/cuda/surf.cu
+++ b/modules/gpu/src/cuda/surf.cu
@ -52,10 +52,10 @@
 #include "opencv2/gpu/device/functional.hpp"
 #include "opencv2/gpu/device/filters.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace surf {
-
+namespace cv { namespace gpu { namespace device 
+{
+    namespace surf 
+    {
        ////////////////////////////////////////////////////////////////////////
        // Global parameters

@ -996,7 +996,5 @@ void compute_descriptors_gpu(const DevMem2Df& descriptors,
                cudaSafeCall( cudaDeviceSynchronize() );
            }
        }
-
    } // namespace surf
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
--- a/modules/gpu/src/cudastream.cpp
+++ b/modules/gpu/src/cudastream.cpp
@ -71,8 +71,8 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }

 #include "opencv2/gpu/stream_accessor.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);

    template <typename T>
@ -81,10 +81,9 @@ template <typename T>
    void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);

    void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE;
+using namespace ::cv::gpu::device;

 struct Stream::Impl
 {
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
@ -123,19 +123,18 @@ namespace
 ////////////////////////////////////////////////////////////////////////
 // add

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T, typename D> 
    void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);

    template <typename T, typename D> 
    void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);

@ -174,7 +173,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu

 void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);

@ -236,19 +235,18 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
 ////////////////////////////////////////////////////////////////////////
 // subtract

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T, typename D> 
    void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);

    template <typename T, typename D> 
    void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);

@ -287,7 +285,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons

 void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);

@ -349,8 +347,8 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
 ////////////////////////////////////////////////////////////////////////
 // multiply

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
    void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);

@ -359,12 +357,11 @@ void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db&

    template <typename T, typename D> 
    void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);

@ -422,7 +419,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub

 void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);

@ -472,8 +469,8 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
 ////////////////////////////////////////////////////////////////////////
 // divide

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
    void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);

@ -485,12 +482,11 @@ void divide_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double

    template <typename T, typename D> 
    void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);

@ -548,7 +544,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double

 void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);

@ -597,7 +593,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc

 void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);

@ -630,19 +626,18 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
 //////////////////////////////////////////////////////////////////////////////
 // absdiff

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T>
    void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);

    template <typename T> 
    void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);

@ -714,7 +709,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea

 void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);

@ -758,18 +753,17 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
 //////////////////////////////////////////////////////////////////////////////
 // Comparison of two matrixes

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
    template <typename T> void compare_ne(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
    template <typename T> void compare_lt(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
    template <typename T> void compare_le(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);

@ -835,14 +829,13 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
 //////////////////////////////////////////////////////////////////////////////
 // Unary bitwise logical operations

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src, PtrStepb dst, cudaStream_t stream);

    template <typename T>
    void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStepb src, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
@ -850,13 +843,13 @@ namespace
    {
        dst.create(src.size(), src.type());

-        OPENCV_DEVICE_NAMESPACE_ bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream);
+        ::cv::gpu::device::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream);
    }


    void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE;
+        using namespace ::cv::gpu::device;

        typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);

@ -893,8 +886,8 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
 //////////////////////////////////////////////////////////////////////////////
 // Binary bitwise logical operations

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);

    template <typename T>
@ -909,8 +902,7 @@ void bitwiseXorCaller(int rows, int cols, size_t elem_size1, int cn, const PtrSt

    template <typename T>
    void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
@ -919,12 +911,12 @@ namespace
        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
        dst.create(src1.size(), src1.type());

-        OPENCV_DEVICE_NAMESPACE_ bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
+        ::cv::gpu::device::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
    }

    void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE;
+        using namespace ::cv::gpu::device;

        typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);

@ -952,13 +944,13 @@ namespace
        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
        dst.create(src1.size(), src1.type());

-        OPENCV_DEVICE_NAMESPACE_ bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
+        ::cv::gpu::device::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
    }


    void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE;
+        using namespace ::cv::gpu::device;

        typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);

@ -986,13 +978,13 @@ namespace
        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
        dst.create(src1.size(), src1.type());

-        OPENCV_DEVICE_NAMESPACE_ bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
+        ::cv::gpu::device::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
    }


    void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE;
+        using namespace ::cv::gpu::device;

        typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);

@ -1046,8 +1038,8 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
 //////////////////////////////////////////////////////////////////////////////
 // Minimum and maximum operations

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T>
    void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);

@ -1059,8 +1051,7 @@ void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStre

    template <typename T>
    void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
@ -1069,14 +1060,14 @@ namespace
    {
        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
        dst.create(src1.size(), src1.type());
-        OPENCV_DEVICE_NAMESPACE_ min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
+        ::cv::gpu::device::min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
    }

    template <typename T>
    void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
    {
        dst.create(src1.size(), src1.type());
-        OPENCV_DEVICE_NAMESPACE_ min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
+        ::cv::gpu::device::min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
    }
    
    template <typename T>
@ -1084,14 +1075,14 @@ namespace
    {
        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
        dst.create(src1.size(), src1.type());
-        OPENCV_DEVICE_NAMESPACE_ max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
+        ::cv::gpu::device::max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
    }

    template <typename T>
    void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
    {
        dst.create(src1.size(), src1.type());
-        OPENCV_DEVICE_NAMESPACE_ max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
+        ::cv::gpu::device::max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
    }
 }

@ -1155,18 +1146,17 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
 ////////////////////////////////////////////////////////////////////////
 // threshold

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T>
    void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
    template <typename T> void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream)
    {
-        OPENCV_DEVICE_NAMESPACE_ threshold_gpu<T>(src, dst, saturate_cast<T>(thresh), saturate_cast<T>(maxVal), type, stream);
+        ::cv::gpu::device::threshold_gpu<T>(src, dst, saturate_cast<T>(thresh), saturate_cast<T>(maxVal), type, stream);
    }
 }

@ -1223,16 +1213,15 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
 ////////////////////////////////////////////////////////////////////////
 // pow

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template<typename T>
    void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    CV_Assert(src.depth() != CV_64F);
    dst.create(src.size(), src.type());
@ -1252,16 +1241,15 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
 ////////////////////////////////////////////////////////////////////////
 // addWeighted

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T1, typename T2, typename D>
    void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE;
+    using namespace ::cv::gpu::device;

    CV_Assert(src1.size() == src2.size());
    CV_Assert(src1.type() == src2.type() || (dtype >= 0 && src1.channels() == src2.channels()));
--- a/modules/gpu/src/filtering.cpp
+++ b/modules/gpu/src/filtering.cpp
@ -735,8 +735,8 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Separable Linear Filter

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace row_filter
    {
        template <typename T, typename D>
@ -748,8 +748,7 @@ namespace column_filter
        template <typename T, typename D>
        void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
@ -803,7 +802,7 @@ namespace

 Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ row_filter;
+    using namespace ::cv::gpu::device::row_filter;

    static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
    
@ -918,7 +917,7 @@ namespace

 Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ column_filter;
+    using namespace ::cv::gpu::device::column_filter;

    static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
    
--- a/modules/gpu/src/hog.cpp
+++ b/modules/gpu/src/hog.cpp
@ -60,8 +60,8 @@ std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nog

 #else

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace hog 
    {
        void set_up_constants(int nbins, int block_stride_x, int block_stride_y, 
@ -94,10 +94,9 @@ namespace hog
        void resize_8UC1(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
        void resize_8UC4(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE;
+using namespace ::cv::gpu::device;
    
 cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size, 
 									  int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@ -107,20 +107,19 @@ void cv::gpu::CannyBuf::release() { throw_nogpu(); }
 ////////////////////////////////////////////////////////////////////////
 // remap

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        template <typename T> 
        void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, 
                       int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, 
        int borderMode, const float* borderValue, cudaStream_t stream, int cc);
@ -160,18 +159,17 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
 ////////////////////////////////////////////////////////////////////////
 // meanShiftFiltering_GPU

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    if( src.empty() )
        CV_Error( CV_StsBadArg, "The input image is empty" );
@ -197,18 +195,17 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
 ////////////////////////////////////////////////////////////////////////
 // meanShiftProc_GPU

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    if( src.empty() )
        CV_Error( CV_StsBadArg, "The input image is empty" );
@ -235,22 +232,21 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
 ////////////////////////////////////////////////////////////////////////
 // drawColorDisp

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
        void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
    template <typename T>
    void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+        using namespace ::cv::gpu::device::imgproc;

        dst.create(src.size(), CV_8UC4);

@ -272,22 +268,21 @@ void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& s
 ////////////////////////////////////////////////////////////////////////
 // reprojectImageTo3D

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void reprojectImageTo3D_gpu(const DevMem2Db& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
        void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
    template <typename T>
    void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+        using namespace ::cv::gpu::device::imgproc;

        xyzw.create(disp.rows, disp.cols, CV_32FC4);

@ -309,14 +304,13 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
 ////////////////////////////////////////////////////////////////////////
 // resize

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
 {
@ -380,7 +374,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
    }
    else
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+        using namespace ::cv::gpu::device::imgproc;

        typedef void (*caller_t)(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
        static const caller_t callers[6][4] = 
@ -400,20 +394,19 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
 ////////////////////////////////////////////////////////////////////////
 // copyMakeBorder

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
    template <typename T, int cn> void copyMakeBorder_caller(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+        using namespace ::cv::gpu::device::imgproc;

        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));

@ -666,21 +659,20 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpPlaneMaps

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
                                const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
                                cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, 
                                 float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
@ -700,21 +692,20 @@ void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, cons
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpCylyndricalMaps

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
                                      const float k_rinv[9], const float r_kinv[9], float scale,
                                      cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
                                       GpuMat& map_x, GpuMat& map_y, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
@ -733,21 +724,20 @@ void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpSphericalMaps

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
                                    const float k_rinv[9], const float r_kinv[9], float scale,
                                    cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
                                     GpuMat& map_x, GpuMat& map_y, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
@ -899,18 +889,17 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
 //////////////////////////////////////////////////////////////////////////////
 // columnSum

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc
    {
        void columnSum_32F(const DevMem2Db src, const DevMem2Db dst);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    CV_Assert(src.type() == CV_32F);

@ -1245,8 +1234,8 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
    hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
 }

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace hist
    {
        void histogram256_gpu(DevMem2Db src, int* hist, unsigned int* buf, cudaStream_t stream);
@ -1256,8 +1245,7 @@ namespace hist

        void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
 {
@ -1267,7 +1255,7 @@ void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)

 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ hist;
+    using namespace ::cv::gpu::device::hist;

    CV_Assert(src.type() == CV_8UC1);

@ -1293,7 +1281,7 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream&

 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ hist;
+    using namespace ::cv::gpu::device::hist;

    CV_Assert(src.type() == CV_8UC1);

@ -1327,16 +1315,15 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat&
 ////////////////////////////////////////////////////////////////////////
 // cornerHarris & minEgenVal

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream);
        void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
        void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace 
 {
@ -1421,7 +1408,7 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& D

 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    CV_Assert(borderType == cv::BORDER_REFLECT101 ||
              borderType == cv::BORDER_REPLICATE);
@ -1448,7 +1435,7 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM

 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
 {  
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    CV_Assert(borderType == cv::BORDER_REFLECT101 ||
              borderType == cv::BORDER_REPLICATE);
@ -1464,20 +1451,19 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
 //////////////////////////////////////////////////////////////////////////////
 // mulSpectrums

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);

        void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream) 
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);

@ -1495,20 +1481,19 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag
 //////////////////////////////////////////////////////////////////////////////
 // mulAndScaleSpectrums

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);

        void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream) 
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
    static Caller callers[] = { mulAndScaleSpectrums, mulAndScaleSpectrums_CONJ };
@ -1673,18 +1658,17 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
    convolve(image, templ, result, ccorr, buf);
 }

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc
    {
        void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

 #ifndef HAVE_CUFFT

@ -1811,18 +1795,17 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
 //////////////////////////////////////////////////////////////////////////////
 // pyrDown

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);

@ -1851,18 +1834,17 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& st
 //////////////////////////////////////////////////////////////////////////////
 // pyrUp

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace imgproc 
    {
        template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
+    using namespace ::cv::gpu::device::imgproc;

    typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);

@ -1933,8 +1915,8 @@ void cv::gpu::CannyBuf::release()
    trackBuf2.release();
 }

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace canny 
    {
        void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);
@ -1950,14 +1932,13 @@ namespace canny

        void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
    void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ canny;
+        using namespace ::cv::gpu::device::canny;

        calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
        
@ -1977,7 +1958,7 @@ void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double hi

 void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ canny;
+    using namespace ::cv::gpu::device::canny;

    CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
    CV_Assert(src.type() == CV_8UC1);
@ -2016,7 +1997,7 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_

 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ canny;
+    using namespace ::cv::gpu::device::canny;

    CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
    CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
--- a/modules/gpu/src/initialization.cpp
+++ b/modules/gpu/src/initialization.cpp
@ -274,8 +274,8 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory)
 ////////////////////////////////////////////////////////////////////
 // GpuFuncTable

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t& stream = 0);

    template <typename T>
@ -284,8 +284,7 @@ template <typename T>
    void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);

    void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
@ -345,7 +344,7 @@ namespace

    void convertToKernelCaller(const GpuMat& src, GpuMat& dst)
    {
-        OPENCV_DEVICE_NAMESPACE_ convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);
+        ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);
    }

    //////////////////////////////////////////////////////////////////////////
@ -403,7 +402,7 @@ namespace
    void kernelSet(GpuMat& src, Scalar s)
    {
        Scalar_<T> sf = s;
-        OPENCV_DEVICE_NAMESPACE_ set_to_gpu(src, sf.val, src.channels(), 0);
+        ::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), 0);
    }

    template<int SDEPTH, int SCN> struct NppSetMaskFunc
@ -458,7 +457,7 @@ namespace
    void kernelSetMask(GpuMat& src, Scalar s, const GpuMat& mask)
    {
        Scalar_<T> sf = s;
-        OPENCV_DEVICE_NAMESPACE_ set_to_gpu(src, sf.val, mask, src.channels(), 0);
+        ::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), 0);
    }

    class CudaFuncTable : public GpuFuncTable
@ -479,7 +478,7 @@ namespace

        void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const 
        { 
-            OPENCV_DEVICE_NAMESPACE_ copy_to_with_mask(src, dst, src.depth(), mask, src.channels());
+            ::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels());
        }

        void convert(const GpuMat& src, GpuMat& dst) const 
@ -560,7 +559,7 @@ namespace

        void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const 
        { 
-            device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta);
+            ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta);
        }

        void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
--- a/modules/gpu/src/match_template.cpp
+++ b/modules/gpu/src/match_template.cpp
@ -52,8 +52,8 @@ void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&)

 #else

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace match_template 
    {
        void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
@ -136,10 +136,9 @@ namespace match_template

        void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE_ match_template;
+using namespace ::cv::gpu::device::match_template;

 namespace 
 {
--- a/modules/gpu/src/matrix_reductions.cpp
+++ b/modules/gpu/src/matrix_reductions.cpp
@ -190,8 +190,8 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
 ////////////////////////////////////////////////////////////////////////
 // Sum

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace matrix_reductions 
    {
        namespace sum
@ -217,8 +217,7 @@ namespace matrix_reductions
            void getBufSizeRequired(int cols, int rows, int cn, int& bufcols, int& bufrows);
        }
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 Scalar cv::gpu::sum(const GpuMat& src) 
 {
@ -229,7 +228,7 @@ Scalar cv::gpu::sum(const GpuMat& src)

 Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) 
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
+    using namespace ::cv::gpu::device::matrix_reductions::sum;

    typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);

@ -272,7 +271,7 @@ Scalar cv::gpu::absSum(const GpuMat& src)

 Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) 
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
+    using namespace ::cv::gpu::device::matrix_reductions::sum;

    typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);

@ -316,7 +315,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src)

 Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) 
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
+    using namespace ::cv::gpu::device::matrix_reductions::sum;

    typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);

@ -353,8 +352,8 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
 ////////////////////////////////////////////////////////////////////////
 // Find min or max

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace matrix_reductions 
    {
        namespace minmax 
@ -374,8 +373,7 @@ namespace matrix_reductions
            void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
        }
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}


 void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask)
@ -387,7 +385,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp

 void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::minmax;
+    using namespace ::cv::gpu::device::matrix_reductions::minmax;

    typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
    typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
@ -457,8 +455,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
 ////////////////////////////////////////////////////////////////////////
 // Locate min and max

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace matrix_reductions 
    {
        namespace minmaxloc 
@ -483,8 +481,7 @@ namespace matrix_reductions
                                              int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
        }
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
 {    
@ -495,7 +492,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
 void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
                        const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::minmaxloc;
+    using namespace ::cv::gpu::device::matrix_reductions::minmaxloc;

    typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
    typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
@ -571,8 +568,8 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
 //////////////////////////////////////////////////////////////////////////////
 // Count non-zero elements

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace matrix_reductions 
    {
        namespace countnonzero 
@ -586,8 +583,7 @@ namespace matrix_reductions
            int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf);
        }
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 int cv::gpu::countNonZero(const GpuMat& src)
 {
@ -598,7 +594,7 @@ int cv::gpu::countNonZero(const GpuMat& src)

 int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::countnonzero;
+    using namespace ::cv::gpu::device::matrix_reductions::countnonzero;

    typedef int (*Caller)(const DevMem2Db src, PtrStepb buf);

@ -632,19 +628,19 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)

 //////////////////////////////////////////////////////////////////////////////
 // reduce
-BEGIN_OPENCV_DEVICE_NAMESPACE

+namespace cv { namespace gpu { namespace device 
+{
    namespace matrix_reductions 
    {
        template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
        template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream)
 {
-    using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions;
+    using namespace ::cv::gpu::device::matrix_reductions;

    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4 && dtype <= CV_32F);
    CV_Assert(dim == 0 || dim == 1);
--- a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
@ -48,8 +48,8 @@
 #include "vec_traits.hpp"
 #include "vec_math.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    //////////////////////////////////////////////////////////////
    // BrdConstant

@ -506,7 +506,7 @@ template <typename D> struct BrdReflect

        __device__ __forceinline__ int idx_col_high(int x) const 
        {
-        return /*::abs*/(last_col - ::abs(last_col - x) + (x > last_col)) /*% (last_col + 1)*/;
+            return (last_col - ::abs(last_col - x) + (x > last_col));
        }

        __device__ __forceinline__ int idx_col(int x) const
@ -710,7 +710,6 @@ template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D>
        const int width;
        const D val;
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/color.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/color.hpp
@ -46,8 +46,8 @@
 #include "internal_shared.hpp"
 #include "detail/color_detail.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    // All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
    // template <typename T> class ColorSpace1_to_ColorSpace2_traits
    // {
@ -217,7 +217,6 @@ OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
    OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)

    #undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
@ -45,16 +45,8 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-#if defined(_WIN64) || defined(__LP64__)		
-    // 64-bit register modifier for inlined asm
-    #define OPENCV_GPU_ASM_PTR "l"
-#else	
-    // 32-bit register modifier for inlined asm
-    #define OPENCV_GPU_ASM_PTR "r"
-#endif
-
+namespace cv { namespace gpu { namespace device 
+{
    #if __CUDA_ARCH__ >= 200

        // for Fermi memory space is detected automatically
@ -65,6 +57,14 @@ BEGIN_OPENCV_DEVICE_NAMESPACE
            
    #else // __CUDA_ARCH__ >= 200        

+        #if defined(_WIN64) || defined(__LP64__)		
+            // 64-bit register modifier for inlined asm
+            #define OPENCV_GPU_ASM_PTR "l"
+        #else	
+            // 32-bit register modifier for inlined asm
+            #define OPENCV_GPU_ASM_PTR "r"
+        #endif
+
        template<class T> struct ForceGlob;

        #define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
@ -97,9 +97,9 @@ BEGIN_OPENCV_DEVICE_NAMESPACE

        #undef OPENCV_GPU_DEFINE_FORCE_GLOB
        #undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
+        #undef OPENCV_GPU_ASM_PTR
        
    #endif // __CUDA_ARCH__ >= 200
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/detail/color_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/color_detail.hpp
@ -49,13 +49,13 @@
 #include "../limits.hpp"
 #include "../functional.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device
+{
    #ifndef CV_DESCALE
        #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
    #endif

-namespace detail
+    namespace color_detail
    {
        template<typename T> struct ColorChannel
        {
@ -99,7 +99,7 @@ namespace detail

 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////

-namespace detail
+    namespace color_detail
    {
        template <typename T, int scn, int dcn, int bidx> struct RGB2RGB : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>
        {
@ -135,7 +135,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -144,7 +144,7 @@ namespace detail

 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////

-namespace detail
+    namespace color_detail
    {
        template <int green_bits, int bidx> struct RGB2RGB5x5Converter;
        template<int bidx> struct RGB2RGB5x5Converter<6, bidx> 
@ -197,14 +197,14 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \
    struct name ## _traits \
    { \
-        typedef detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        template <int green_bits, int bidx> struct RGB5x52RGBConverter;    
        template <int bidx> struct RGB5x52RGBConverter<5, bidx>
@ -267,7 +267,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \
    struct name ## _traits \
    { \
-        typedef detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -276,7 +276,7 @@ namespace detail

 ///////////////////////////////// Grayscale to Color ////////////////////////////////

-namespace detail
+    namespace color_detail
    {
        template <typename T, int dcn> struct Gray2RGB : unary_function<T, typename TypeVec<T, dcn>::vec_type>
        {
@ -308,14 +308,14 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::Gray2RGB<T, dcn> functor_type; \
+        typedef ::cv::gpu::device::color_detail::Gray2RGB<T, dcn> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        template <int green_bits> struct Gray2RGB5x5Converter;
        template<> struct Gray2RGB5x5Converter<6> 
@ -346,7 +346,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \
    struct name ## _traits \
    { \
-        typedef detail::Gray2RGB5x5<green_bits> functor_type; \
+        typedef ::cv::gpu::device::color_detail::Gray2RGB5x5<green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -355,7 +355,7 @@ namespace detail

 ///////////////////////////////// Color to Grayscale ////////////////////////////////

-namespace detail
+    namespace color_detail
    {
        template <int green_bits> struct RGB5x52GrayConverter;
        template <> struct RGB5x52GrayConverter<6> 
@ -385,14 +385,14 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \
    struct name ## _traits \
    { \
-        typedef detail::RGB5x52Gray<green_bits> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB5x52Gray<green_bits> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        template <int bidx, typename T> static __device__ __forceinline__ T RGB2GrayConvert(const T* src)
        {
@ -429,7 +429,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::RGB2Gray<T, scn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2Gray<T, scn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -438,7 +438,7 @@ namespace detail

 ///////////////////////////////////// RGB <-> YUV //////////////////////////////////////

-namespace detail
+    namespace color_detail
    {
        __constant__ float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
        __constant__ int   c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
@ -499,14 +499,14 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        __constant__ float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
        __constant__ int   c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 }; 
@ -570,7 +570,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -579,7 +579,7 @@ namespace detail

 ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
    
-namespace detail
+    namespace color_detail
    {
        __constant__ float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
        __constant__ int   c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
@ -640,14 +640,14 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        __constant__ float c_YCrCb2RGBCoeffs_f[5] = {1.403f, -0.714f, -0.344f, 1.773f};
        __constant__ int   c_YCrCb2RGBCoeffs_i[5] = {22987, -11698, -5636, 29049};
@ -711,7 +711,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -720,7 +720,7 @@ namespace detail

 ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////

-namespace detail
+    namespace color_detail
    {
        __constant__ float c_RGB2XYZ_D65f[9] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f };
        __constant__ int   c_RGB2XYZ_D65i[9] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 };
@ -779,14 +779,14 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        __constant__ float c_XYZ2sRGB_D65f[9] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f };
        __constant__ int   c_XYZ2sRGB_D65i[9] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 };
@ -846,7 +846,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::gpu::device::color_detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -855,7 +855,7 @@ namespace detail

 ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////

-namespace detail
+    namespace color_detail
    {
        __constant__ int c_HsvDivTable   [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096};
        __constant__ int c_HsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482};
@ -976,7 +976,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -984,7 +984,7 @@ namespace detail
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -992,7 +992,7 @@ namespace detail
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1000,14 +1000,14 @@ namespace detail
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        __constant__ int c_HsvSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };

@ -1102,7 +1102,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1110,7 +1110,7 @@ namespace detail
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1118,7 +1118,7 @@ namespace detail
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1126,7 +1126,7 @@ namespace detail
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1135,7 +1135,7 @@ namespace detail

 /////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////

-namespace detail
+    namespace color_detail
    {
        template <int bidx, int hr, typename D> static __device__ void RGB2HLSConvert(const float* src, D& dst)
        {
@ -1227,7 +1227,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1235,7 +1235,7 @@ namespace detail
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1243,7 +1243,7 @@ namespace detail
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1251,14 +1251,14 @@ namespace detail
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };

-namespace detail
+    namespace color_detail
    {
        __constant__ int c_HlsSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} };

@ -1359,7 +1359,7 @@ namespace detail
 #define OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \
    template <typename T> struct name ## _traits \
    { \
-        typedef detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1367,7 +1367,7 @@ namespace detail
    }; \
    template <typename T> struct name ## _full_traits \
    { \
-        typedef detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1375,7 +1375,7 @@ namespace detail
    }; \
    template <> struct name ## _traits<float> \
    { \
-        typedef detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
@ -1383,13 +1383,12 @@ namespace detail
    }; \
    template <> struct name ## _full_traits<float> \
    { \
-        typedef detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::gpu::device::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
        static __host__ __device__ __forceinline__ functor_type create_functor() \
        { \
            return functor_type(); \
        } \
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_COLOR_DETAIL_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
@ -47,26 +47,10 @@
 #include "../vec_traits.hpp"
 #include "../functional.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace detail
+namespace cv { namespace gpu { namespace device 
 {
-    //! Mask accessor
-
-    struct MaskReader
+    namespace transform_detail
    {
-        explicit MaskReader(const PtrStepb& mask_): mask(mask_) {}
-
-        __device__ __forceinline__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
-
-        const PtrStepb mask;
-    };
-
-    struct NoMask 
-    {
-        __device__ __forceinline__ bool operator()(int y, int x) const { return true; } 
-    };
-
        //! Read Write Traits

        template <typename T, typename D, int shift> struct UnaryReadWriteTraits
@ -404,8 +388,7 @@ namespace detail
            typedef TransformFunctorTraits<BinOp> ft;
            TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
        }
-}
-
-END_OPENCV_DEVICE_NAMESPACE
+    } // namespace transform_detail
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/detail/type_traits_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/type_traits_detail.hpp
@ -46,9 +46,9 @@
 #include "internal_shared.hpp"
 #include "../vec_traits.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace detail
+namespace cv { namespace gpu { namespace device 
+{
+    namespace type_traits_detail
    {
        template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
        template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
@ -181,8 +181,7 @@ namespace detail
            typedef U& type;
            enum { value = 1 };
        };
-}
-
-END_OPENCV_DEVICE_NAMESPACE
+    } // namespace type_traits_detail
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/detail/utility_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/utility_detail.hpp
@ -45,9 +45,9 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace detail
+namespace cv { namespace gpu { namespace device 
+{
+    namespace utility_detail
    {
        ///////////////////////////////////////////////////////////////////////////////
        // Reduction
@ -837,8 +837,7 @@ namespace detail
                }
            }
        };
-}
-
-END_OPENCV_DEVICE_NAMESPACE
+    } // namespace utility_detail
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_UTILITY_DETAIL_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/detail/vec_distance_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/vec_distance_detail.hpp
@ -46,9 +46,9 @@
 #include "internal_shared.hpp"
 #include "../datamov_utils.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace detail
+namespace cv { namespace gpu { namespace device 
+{
+    namespace vec_distance_detail
    {
        template <int THREAD_DIM, int N> struct UnrollVecDiffCached
        {
@ -112,8 +112,7 @@ namespace detail
                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
            }
        };
-}
-
-END_OPENCV_DEVICE_NAMESPACE
+    } // namespace vec_distance_detail
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/emulation.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/emulation.hpp
@ -46,8 +46,8 @@
 #include "internal_shared.hpp"
 #include "warp_reduce.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    struct Emulation
    {
 	    static __forceinline__ __device__ int Ballot(int predicate, volatile int* cta_buffer)
@ -62,7 +62,6 @@ struct Emulation
    #endif
 	    }
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif /* OPENCV_GPU_EMULATION_HPP_ */
--- a/modules/gpu/src/opencv2/gpu/device/filters.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/filters.hpp
@ -48,8 +48,8 @@
 #include "vec_traits.hpp"
 #include "vec_math.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename Ptr2D> struct PointFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
@ -131,7 +131,6 @@ template <typename Ptr2D> struct CubicFilter

        const Ptr2D src;
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_FILTERS_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/funcattrib.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/funcattrib.hpp
@ -47,8 +47,8 @@
 #include <cstdio>
 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template<class Func> 
    void printFuncAttrib(Func& func)
    {
@ -68,7 +68,6 @@ void printFuncAttrib(Func& func)
        printf("\n");
        fflush(stdout); 
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif  /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */
--- a/modules/gpu/src/opencv2/gpu/device/functional.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/functional.hpp
@ -49,8 +49,8 @@
 #include "vec_traits.hpp"
 #include "type_traits.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    // Function Objects

    using thrust::unary_function;
@ -241,6 +241,7 @@ template <typename T> struct maximum : binary_function<T, T, T>
            return lhs < rhs ? rhs : lhs;
        }
    };
+
    OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
    OPENCV_GPU_IMPLEMENT_MINMAX(maximum, schar, ::max)
    OPENCV_GPU_IMPLEMENT_MINMAX(maximum, char, ::max)
@ -258,6 +259,7 @@ template <typename T> struct minimum : binary_function<T, T, T>
            return lhs < rhs ? lhs : rhs;
        }
    };
+
    OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
    OPENCV_GPU_IMPLEMENT_MINMAX(minimum, schar, ::min)
    OPENCV_GPU_IMPLEMENT_MINMAX(minimum, char, ::min)
@ -287,6 +289,7 @@ OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, ::fmin)
            return func(v); \
        } \
    };
+
 #define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
    template <typename T> struct name ## _func : binary_function<T, T, float> \
    { \
@ -505,7 +508,7 @@ template <typename F> struct IsBinaryFunction
        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
    };

-namespace detail
+    namespace functional_detail
    {
        template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
@ -513,7 +516,7 @@ namespace detail

        template <typename T, typename D> struct DefaultUnaryShift
        {
-        enum { shift = detail::UnOpShift<sizeof(T), sizeof(D)>::shift };
+            enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
        };
        
        template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
@ -522,7 +525,7 @@ namespace detail

        template <typename T1, typename T2, typename D> struct DefaultBinaryShift
        {
-        enum { shift = detail::BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
+            enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
        };

        template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
@ -538,7 +541,7 @@ namespace detail

    template <typename Func> struct DefaultTransformShift
    {
-    enum { shift = detail::ShiftDispatcher<Func>::shift };
+        enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
    };

    template <typename Func> struct DefaultTransformFunctorTraits
@ -553,9 +556,8 @@ template <typename Func> struct DefaultTransformFunctorTraits

    template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};

-#define DEFINE_TRANSFORM_FUNCTOR_TRAITS(type) \
+#define OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(type) \
    template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_FUNCTIONAL_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/limits.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/limits.hpp
@ -43,10 +43,11 @@
 #ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
 #define __OPENCV_GPU_LIMITS_GPU_HPP__

+#include <limits>
 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template<class T> struct numeric_limits
    {
        typedef T type;
@ -92,8 +93,8 @@ template<> struct numeric_limits<char>
    template<> struct numeric_limits<signed char>
    {
        typedef char type;
-    __device__ __forceinline__ static type min() { return CHAR_MIN; };
-    __device__ __forceinline__ static type max() { return CHAR_MAX; };
+        __device__ __forceinline__ static type min() { return SCHAR_MIN; };
+        __device__ __forceinline__ static type max() { return SCHAR_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
        __device__ __forceinline__ static type denorm_min();
@ -229,7 +230,6 @@ template<> struct numeric_limits<double>
        __device__ __forceinline__ static type signaling_NaN();
        static const bool is_signed = true;
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device {

 #endif // __OPENCV_GPU_LIMITS_GPU_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
@ -45,8 +45,8 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device
+{
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
@ -211,7 +211,6 @@ template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
        return saturate_cast<uint>((float)v);
    #endif
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 #endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
--- a/modules/gpu/src/opencv2/gpu/device/static_check.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/static_check.hpp
@ -49,9 +49,7 @@
    #define __OPENCV_GPU_HOST_DEVICE__
 #endif  

-namespace cv
-{
-    namespace gpu
+namespace cv { namespace gpu 
 { 
    namespace device
    {
@ -63,10 +61,9 @@ namespace cv
        };
    }    

-        using cv::gpu::device::Static;
-    }
-}
+    using ::cv::gpu::device::Static;
+}}

-#undef __PCL_GPU_HOST_DEVICE__
+#undef __OPENCV_GPU_HOST_DEVICE__

 #endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */ 
--- a/modules/gpu/src/opencv2/gpu/device/transform.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/transform.hpp
@ -47,30 +47,31 @@
 #include "utility.hpp"
 #include "detail/transform_detail.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T, typename D, typename UnOp>
    void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, cudaStream_t stream = 0)
    {
-    detail::transform_caller(src, dst, op, WithOutMask(), stream);
+        transform_detail::transform_caller(src, dst, op, WithOutMask(), stream);
    }
+
    template <typename T, typename D, typename UnOp>
    void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStepb& mask, const UnOp& op, cudaStream_t stream = 0)
    {
-    detail::transform_caller(src, dst, op, SingleMask(mask), stream);
+        transform_detail::transform_caller(src, dst, op, SingleMask(mask), stream);
    }

    template <typename T1, typename T2, typename D, typename BinOp>
    void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, cudaStream_t stream = 0)
    {
-    detail::transform_caller(src1, src2, dst, op, WithOutMask(), stream);
+        transform_detail::transform_caller(src1, src2, dst, op, WithOutMask(), stream);
    }
+
    template <typename T1, typename T2, typename D, typename BinOp>
    void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const PtrStepb& mask, const BinOp& op, cudaStream_t stream = 0)
    {
-    detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
+        transform_detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 #endif // __OPENCV_GPU_TRANSFORM_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/type_traits.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/type_traits.hpp
@ -46,37 +46,38 @@
 #include "internal_shared.hpp"
 #include "detail/type_traits_detail.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T> struct IsSimpleParameter
    {
-    enum {value = detail::IsIntegral<T>::value || detail::IsFloat<T>::value || detail::PointerTraits<typename detail::ReferenceTraits<T>::type>::value};
+        enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value || 
+            type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
    };

    template <typename T> struct TypeTraits
    {
-    typedef typename detail::UnConst<T>::type                                       NonConstType;
-    typedef typename detail::UnVolatile<T>::type                                    NonVolatileType;
-    typedef typename detail::UnVolatile<typename detail::UnConst<T>::type>::type    UnqualifiedType;
-    typedef typename detail::PointerTraits<UnqualifiedType>::type                   PointeeType;
-    typedef typename detail::ReferenceTraits<T>::type                               ReferredType;
+        typedef typename type_traits_detail::UnConst<T>::type                                                NonConstType;
+        typedef typename type_traits_detail::UnVolatile<T>::type                                             NonVolatileType;
+        typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
+        typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type                            PointeeType;
+        typedef typename type_traits_detail::ReferenceTraits<T>::type                                        ReferredType;

-    enum { isConst          = detail::UnConst<T>::value };
-    enum { isVolatile       = detail::UnVolatile<T>::value };
+        enum { isConst          = type_traits_detail::UnConst<T>::value };
+        enum { isVolatile       = type_traits_detail::UnVolatile<T>::value };

-    enum { isReference      = detail::ReferenceTraits<UnqualifiedType>::value };
-    enum { isPointer        = detail::PointerTraits<typename detail::ReferenceTraits<UnqualifiedType>::type>::value };        
+        enum { isReference      = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
+        enum { isPointer        = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };        

-    enum { isUnsignedInt = detail::IsUnsignedIntegral<UnqualifiedType>::value };
-    enum { isSignedInt   = detail::IsSignedIntergral<UnqualifiedType>::value };
-    enum { isIntegral    = detail::IsIntegral<UnqualifiedType>::value };
-    enum { isFloat       = detail::IsFloat<UnqualifiedType>::value  };
+        enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
+        enum { isSignedInt   = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
+        enum { isIntegral    = type_traits_detail::IsIntegral<UnqualifiedType>::value };
+        enum { isFloat       = type_traits_detail::IsFloat<UnqualifiedType>::value  };
        enum { isArith       = isIntegral || isFloat };
-    enum { isVec         = detail::IsVec<UnqualifiedType>::value  };
+        enum { isVec         = type_traits_detail::IsVec<UnqualifiedType>::value  };
        
-    typedef typename detail::Select<IsSimpleParameter<UnqualifiedType>::value, T, typename detail::AddParameterType<T>::type>::type ParameterType;
+        typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value, 
+            T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 #endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/utility.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/utility.hpp
@ -48,8 +48,8 @@
 #include "datamov_utils.hpp"
 #include "detail/utility_detail.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    #define OPENCV_GPU_LOG_WARP_SIZE	    (5)
    #define OPENCV_GPU_WARP_SIZE	        (1 << OPENCV_GPU_LOG_WARP_SIZE)
    #define OPENCV_GPU_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
@ -139,21 +139,21 @@ struct WithOutMask
    template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
    {
        StaticAssert<n >= 8 && n <= 512>::check();
-    detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
+        utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
    }

    template <int n, typename T, typename V, typename Pred> 
    __device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
    {
        StaticAssert<n >= 8 && n <= 512>::check();
-    detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
+        utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
    }

    template <int n, typename T, typename V1, typename V2, typename Pred> 
    __device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
    {
        StaticAssert<n >= 8 && n <= 512>::check();
-    detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
+        utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
    }
    
    ///////////////////////////////////////////////////////////////////////////////
@ -209,7 +209,6 @@ template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3],

        return false;
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_UTILITY_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/vec_distance.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/vec_distance.hpp
@ -48,8 +48,8 @@
 #include "functional.hpp"
 #include "detail/vec_distance_detail.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template <typename T> struct L1Dist
    {
        typedef int value_type;
@ -172,7 +172,7 @@ __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist&
    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
    __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
    {        
-    detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
+        vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
        
        dist.reduceAll<THREAD_DIM>(smem, tid);
    }
@ -220,7 +220,6 @@ template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct V

        U vec1Vals[MAX_LEN / THREAD_DIM];
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/vec_math.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/vec_math.hpp
@ -48,9 +48,9 @@
 #include "vec_traits.hpp"
 #include "functional.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
-namespace detail
+namespace cv { namespace gpu { namespace device 
+{
+    namespace vec_math_detail
    {
        template <int cn, typename VecD> struct SatCastHelper;
        template <typename VecD> struct SatCastHelper<1, VecD>
@ -92,41 +92,41 @@ namespace detail
        }
    }

-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}

-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}

-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}

-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return detail::saturate_cast_caller<_Tp>(v);}
-template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
+    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}

 #define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
    __device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
@ -150,7 +150,7 @@ template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const
        return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
    }

-namespace detail
+    namespace vec_math_detail
    {    
        template <typename T1, typename T2> struct BinOpTraits
        {
@ -201,16 +201,16 @@ namespace detail
        return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
    { \
-        func<typename detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
+        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
    { \
-        func<typename detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
+        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
    } \
    __device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
    { \
@ -218,16 +218,16 @@ namespace detail
        return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
    { \
-        func<typename detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
+        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
    { \
-        func<typename detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
+        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
    } \
    __device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
    { \
@ -235,16 +235,16 @@ namespace detail
        return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
    { \
-        func<typename detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
+        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
    { \
-        func<typename detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
+        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
    } \
    __device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
    { \
@ -252,16 +252,16 @@ namespace detail
        return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
    { \
-        func<typename detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
+        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
    } \
    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
+    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
    { \
-        func<typename detail::BinOpTraits<T, type>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
+        func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
+        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
    }

 #define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
@ -326,7 +326,6 @@ OPENCV_GPU_IMPLEMENT_VEC_OP(double)
    #undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
    #undef OPENCV_GPU_IMPLEMENT_VEC_OP
    #undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device
        
 #endif // __OPENCV_GPU_VECMATH_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
@ -45,8 +45,8 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    template<typename T, int N> struct TypeVec;

    struct __align__(8) uchar8
@ -275,7 +275,6 @@ template<> struct VecTraits<char8>
        static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
        static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif // __OPENCV_GPU_VEC_TRAITS_HPP__
--- a/modules/gpu/src/opencv2/gpu/device/warp.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/warp.hpp
@ -45,8 +45,8 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    struct Warp
    {
        enum
@ -109,7 +109,6 @@ struct Warp
                *t = value;
        }
    };
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device

 #endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
--- a/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp
@ -46,8 +46,8 @@

 #include "internal_shared.hpp"

-BEGIN_OPENCV_DEVICE_NAMESPACE
-		
+namespace cv { namespace gpu { namespace device 
+{		
    template <class T> 
    __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
    {
@ -66,7 +66,6 @@ __device__ __forceinline__ T warp_reduce ( volatile T *ptr , const unsigned int

 	    return ptr[tid - lane];
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}} // namespace cv { namespace gpu { namespace device {

 #endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */
--- a/modules/gpu/src/split_merge.cpp
+++ b/modules/gpu/src/split_merge.cpp
@ -55,21 +55,20 @@ void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*st

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace split_merge 
    {    
        void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
        void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 namespace
 {
    void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream) 
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ split_merge;
+        using namespace ::cv::gpu::device::split_merge;

        CV_Assert(src);
        CV_Assert(n > 0);
@ -108,7 +107,7 @@ namespace

    void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream) 
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ split_merge;
+        using namespace ::cv::gpu::device::split_merge;

        CV_Assert(dst);

--- a/modules/gpu/src/stereobm.cpp
+++ b/modules/gpu/src/stereobm.cpp
@ -55,16 +55,15 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&,

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace stereobm
    {
        void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int ndisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t & stream);
        void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
        void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream);
    }
-
-END_OPENCV_DEVICE_NAMESPACE
+}}}

 const float defaultAvgTexThreshold = 3;

@ -99,7 +98,7 @@ namespace
 {
    void stereo_bm_gpu_operator( GpuMat& minSSD,  GpuMat& leBuf, GpuMat&  riBuf,  int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
    {
-        using namespace OPENCV_DEVICE_NAMESPACE_ stereobm;
+        using namespace ::cv::gpu::device::stereobm;

        CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
        CV_DbgAssert(left.type() == CV_8UC1);
--- a/modules/gpu/src/stereobp.cpp
+++ b/modules/gpu/src/stereobp.cpp
@ -59,8 +59,8 @@ void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace stereobp
    {
        void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
@ -77,10 +77,9 @@ namespace stereobp
        void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, 
            const DevMem2D_<short>& disp, cudaStream_t stream);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE_ stereobp;
+using namespace ::cv::gpu::device::stereobp;

 namespace
 {
--- a/modules/gpu/src/stereocsbp.cpp
+++ b/modules/gpu/src/stereocsbp.cpp
@ -57,8 +57,8 @@ void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, Gp

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace stereocsbp
    {
        void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
@ -87,10 +87,9 @@ namespace stereocsbp
        void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
            const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE_ stereocsbp;
+using namespace ::cv::gpu::device::stereocsbp;

 namespace
 {
--- a/modules/gpu/src/surf.cpp
+++ b/modules/gpu/src/surf.cpp
@ -63,8 +63,8 @@ void cv::gpu::SURF_GPU::releaseMemory() { throw_nogpu(); }

 #else /* !defined (HAVE_CUDA) */

-BEGIN_OPENCV_DEVICE_NAMESPACE
-
+namespace cv { namespace gpu { namespace device 
+{
    namespace surf
    {
        void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
@ -88,10 +88,9 @@ namespace surf
        void compute_descriptors_gpu(const DevMem2Df& descriptors, 
            const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
    }
+}}}

-END_OPENCV_DEVICE_NAMESPACE
-
-using namespace OPENCV_DEVICE_NAMESPACE_ surf;
+using namespace ::cv::gpu::device::surf;

 namespace
 {
--- a/modules/gpu/test/test_video.cpp
+++ b/modules/gpu/test/test_video.cpp
@ -225,7 +225,7 @@ TEST_P(InterpolateFrames, Regression)

 #ifndef DUMP

-    EXPECT_MAT_NEAR(newFrame_gold, newFrame, 1e-4);
+    EXPECT_MAT_NEAR(newFrame_gold, newFrame, 1e-3);

 #else