From af59a75ffc58e04c64cf9941f54f21ef030e2ee0 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <no@email>
Date: Tue, 10 Jan 2012 11:11:58 +0000
Subject: [PATCH] fixed bug with submatrix in some gpu functions update gpu
 tests

---
 modules/core/src/cuda/matrix_operations.cu    |   59 +-
 modules/core/src/gpumat.cpp                   |   12 +-
 modules/gpu/src/cuda/calib3d.cu               |    4 +-
 modules/gpu/src/cuda/color.cu                 |    2 +-
 modules/gpu/src/cuda/element_operations.cu    |   60 +-
 modules/gpu/src/cuda/matrix_reductions.cu     |    1 -
 modules/gpu/src/cuda/remap.cu                 |  102 +-
 modules/gpu/src/cuda/resize.cu                |   84 +-
 modules/gpu/src/imgproc.cpp                   |   59 +-
 .../gpu/device/detail/transform_detail.hpp    |   14 -
 .../gpu/src/opencv2/gpu/device/transform.hpp  |   26 +-
 .../gpu/src/opencv2/gpu/device/utility.hpp    |   15 +-
 modules/gpu/test/test_arithm.cpp              | 1066 ++++-------
 modules/gpu/test/test_calib3d.cpp             |   41 +-
 modules/gpu/test/test_features2d.cpp          |  203 +-
 modules/gpu/test/test_filters.cpp             |  359 ++--
 modules/gpu/test/test_gpu_base.cpp            |  101 +-
 modules/gpu/test/test_gpu_base.hpp            |   92 +-
 modules/gpu/test/test_hog.cpp                 |   19 +-
 modules/gpu/test/test_imgproc.cpp             | 1635 ++++++-----------
 modules/gpu/test/test_main.cpp                |   24 +-
 modules/gpu/test/test_matop.cpp               |  221 +--
 modules/gpu/test/test_nvidia.cpp              |   42 +-
 modules/gpu/test/test_precomp.hpp             |    1 +
 modules/gpu/test/test_video.cpp               |   21 +-
 25 files changed, 1777 insertions(+), 2486 deletions(-)
diff --git a/modules/core/src/cuda/matrix_operations.cu b/modules/core/src/cuda/matrix_operations.cu
index 46dc85929..38c7b2826 100644
--- a/modules/core/src/cuda/matrix_operations.cu
+++ b/modules/core/src/cuda/matrix_operations.cu
@@ -59,56 +59,27 @@ namespace cv { namespace gpu { namespace device
     ////////////////////////////////// CopyTo /////////////////////////////////
     ///////////////////////////////////////////////////////////////////////////
 
-    template<typename T>
-    __global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
-    {
-        size_t x = blockIdx.x * blockDim.x + threadIdx.x;
-        size_t y = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if ((x < cols * channels ) && (y < rows))
-            if (mask[y * step_mask + x / channels] != 0)
-            {
-                size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
-                mat_dst[idx] = mat_src[idx];
-            }
+    template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream)
+    {        
+        cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, channels), stream);
     }
 
-    template<typename T>
-    void copy_to_with_mask_run(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream)
+    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream)
     {
-        dim3 threadsPerBlock(16,16, 1);
-        dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
+        typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream);
 
-        copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
-                ((T*)mat_src.data, (T*)mat_dst.data, (unsigned char*)mask.data, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall ( cudaDeviceSynchronize() );
-    }
-
-    void copy_to_with_mask(DevMem2Db mat_src, DevMem2Db mat_dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream)
-    {
-        typedef void (*CopyToFunc)(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream);
-
-        static CopyToFunc tab[8] =
+        static func_t tab[] =
         {
-            copy_to_with_mask_run<unsigned char>,
-            copy_to_with_mask_run<signed char>,
-            copy_to_with_mask_run<unsigned short>,
-            copy_to_with_mask_run<short>,
-            copy_to_with_mask_run<int>,
-            copy_to_with_mask_run<float>,
-            copy_to_with_mask_run<double>,
-            0
+            copyToWithMask<unsigned char>,
+            copyToWithMask<signed char>,
+            copyToWithMask<unsigned short>,
+            copyToWithMask<short>,
+            copyToWithMask<int>,
+            copyToWithMask<float>,
+            copyToWithMask<double>
         };
 
-        CopyToFunc func = tab[depth];
-
-        if (func == 0) 
-            cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__, "copy_to_with_mask");
-
-        func(mat_src, mat_dst, mask, channels, stream);
+        tab[depth](src, dst, mask, channels, stream);
     }
 
     ///////////////////////////////////////////////////////////////////////////
@@ -303,7 +274,7 @@ namespace cv { namespace gpu { namespace device
         cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
         cudaSafeCall( cudaSetDoubleForDevice(&beta) );
         Convertor<T, D> op(alpha, beta);
-        ::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)
diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp
index c5910761f..756daa9d8 100644
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@@ -348,7 +348,7 @@ namespace
 
 namespace cv { namespace gpu { namespace device
 {
-    void copy_to_with_mask(DevMem2Db src, DevMem2Db dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream);
+    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream);
 
     template <typename T>
     void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);
@@ -391,13 +391,13 @@ namespace
     template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream)
     {
         Scalar_<T> sf = s;
-        ::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream);
+        cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream);
     }
 
     template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
     {
         Scalar_<T> sf = s;
-        ::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
+        cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
     }
 }
 
@@ -405,17 +405,17 @@ namespace cv { namespace gpu
 {
     CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
     {
-        ::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels(), stream);
+        cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.depth(), src.channels(), mask, stream);
     }
 
     CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)
     {
-        ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
+        cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
     }
 
     CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
     {
-        ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
+        cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
     }
 
     CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu
index 27c2afb34..e296aeb56 100644
--- a/modules/gpu/src/cuda/calib3d.cu
+++ b/modules/gpu/src/cuda/calib3d.cu
@@ -74,7 +74,7 @@ namespace cv { namespace gpu { namespace device
             cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
-            ::cv::gpu::device::transform(src, dst, TransformOp(), stream);
+            cv::gpu::device::transform(src, dst, TransformOp(), WithOutMask(), stream);
         }
     } // namespace transform_points
 
@@ -113,7 +113,7 @@ namespace cv { namespace gpu { namespace device
             cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
-            ::cv::gpu::device::transform(src, dst, ProjectOp(), stream);
+            cv::gpu::device::transform(src, dst, ProjectOp(), WithOutMask(), stream);
         }
     } // namespace project_points
 
diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu
index 9384ea668..1dc03c46a 100644
--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
@@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace device
         traits::functor_type functor = traits::create_functor(); \
         typedef typename traits::functor_type::argument_type src_t; \
         typedef typename traits::functor_type::result_type   dst_t; \
-        ::cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
+        cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, WithOutMask(), stream); \
     }
 
 #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu
index b4d72f0ce..8d1995514 100644
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
@@ -84,9 +84,9 @@ namespace cv { namespace gpu { namespace device
     template <typename T, typename D> void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
     {
         if (mask.data)
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Add<T, D>(), stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), SingleMask(mask), stream);
         else
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), WithOutMask(), stream);
     }
 
     template void add_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@@ -181,9 +181,9 @@ namespace cv { namespace gpu { namespace device
         cudaSafeCall( cudaSetDoubleForDevice(&val) );
         AddScalar<T, D> op(val);
         if (mask.data)
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, SingleMask(mask), stream);
         else
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     template void add_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@@ -277,9 +277,9 @@ namespace cv { namespace gpu { namespace device
     template <typename T, typename D> void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
     {
         if (mask.data)
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Subtract<T, D>(), stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), SingleMask(mask), stream);
         else
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), WithOutMask(), stream);
     }
 
     template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@@ -374,9 +374,9 @@ namespace cv { namespace gpu { namespace device
         cudaSafeCall( cudaSetDoubleForDevice(&val) );
         SubtractScalar<T, D> op(val);
         if (mask.data)
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, SingleMask(mask), stream);
         else
-            ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
@@ -462,7 +462,7 @@ namespace cv { namespace gpu { namespace device
 
     void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform(static_cast< DevMem2D_<uint> >(src1), src2, static_cast< DevMem2D_<uint> >(dst), multiply_8uc4_32f(), stream);
+        cv::gpu::device::transform(static_cast< DevMem2D_<uint> >(src1), src2, static_cast< DevMem2D_<uint> >(dst), multiply_8uc4_32f(), WithOutMask(), stream);
     }
 
     struct multiply_16sc4_32f : binary_function<short4, float, short4>
@@ -483,7 +483,7 @@ namespace cv { namespace gpu { namespace device
 
     void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), multiply_16sc4_32f(), stream);
+        cv::gpu::device::transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), multiply_16sc4_32f(), WithOutMask(), stream);
     }
 
     template <typename T, typename D> struct Multiply : binary_function<T, T, D>
@@ -521,7 +521,7 @@ namespace cv { namespace gpu { namespace device
     {
         cudaSafeCall( cudaSetDoubleForDevice(&scale) );
         Multiply<T, D> op(scale);
-        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
@@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace device
         cudaSafeCall( cudaSetDoubleForDevice(&val) );
         cudaSafeCall( cudaSetDoubleForDevice(&scale) );
         MultiplyScalar<T, D> op(val, scale);
-        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
@@ -698,7 +698,7 @@ namespace cv { namespace gpu { namespace device
 
     void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream)
     {
-        transform(static_cast< DevMem2D_<uchar4> >(src1), src2, static_cast< DevMem2D_<uchar4> >(dst), divide_8uc4_32f(), stream);
+        cv::gpu::device::transform(static_cast< DevMem2D_<uchar4> >(src1), src2, static_cast< DevMem2D_<uchar4> >(dst), divide_8uc4_32f(), WithOutMask(), stream);
     }
 
 
@@ -721,7 +721,7 @@ namespace cv { namespace gpu { namespace device
 
     void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream)
     {
-        transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), divide_16sc4_32f(), stream);
+        cv::gpu::device::transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), divide_16sc4_32f(), WithOutMask(), stream);
     }
 
     template <typename T, typename D> struct Divide : binary_function<T, T, D>
@@ -759,7 +759,7 @@ namespace cv { namespace gpu { namespace device
     {
         cudaSafeCall( cudaSetDoubleForDevice(&scale) );
         Divide<T, D> op(scale);
-        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
@@ -855,7 +855,7 @@ namespace cv { namespace gpu { namespace device
         cudaSafeCall( cudaSetDoubleForDevice(&val) );
         cudaSafeCall( cudaSetDoubleForDevice(&scale) );
         DivideScalar<T, D> op(val, scale);
-        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
@@ -949,7 +949,7 @@ namespace cv { namespace gpu { namespace device
     {
         cudaSafeCall( cudaSetDoubleForDevice(&scalar) );
         Reciprocal<T, D> op(scalar);
-        ::cv::gpu::device::transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
     }
 
     template void divide_gpu<uchar, uchar >(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1055,7 +1055,7 @@ namespace cv { namespace gpu { namespace device
 
     template <typename T> void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), WithOutMask(), stream);
     }
 
     template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1101,7 +1101,7 @@ namespace cv { namespace gpu { namespace device
     {
         cudaSafeCall( cudaSetDoubleForDevice(&val) );
         AbsdiffScalar<T> op(val);
-        ::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream);
     }
 
     template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1188,7 +1188,7 @@ namespace cv { namespace gpu { namespace device
     template <template <typename> class Op, typename T> void compare(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
     {
         Op<T> op;
-        ::cv::gpu::device::transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, stream);
+        cv::gpu::device::transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, WithOutMask(), stream);
     }
 
     template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
@@ -1546,7 +1546,7 @@ namespace cv { namespace gpu { namespace device
     template <typename T>
     void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform(src1, src2, dst, minimum<T>(), stream);    
+        cv::gpu::device::transform(src1, src2, dst, minimum<T>(), WithOutMask(), stream);    
     }
 
     template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1560,7 +1560,7 @@ namespace cv { namespace gpu { namespace device
     template <typename T>
     void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform(src1, src2, dst, maximum<T>(), stream);    
+        cv::gpu::device::transform(src1, src2, dst, maximum<T>(), WithOutMask(), stream);    
     }
 
     template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1574,7 +1574,7 @@ namespace cv { namespace gpu { namespace device
     template <typename T>
     void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);    
+        cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), WithOutMask(), stream);    
     }
 
     template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1588,7 +1588,7 @@ namespace cv { namespace gpu { namespace device
     template <typename T>
     void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);    
+        cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), WithOutMask(), stream);    
     }
 
     template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
@@ -1635,19 +1635,17 @@ namespace cv { namespace gpu { namespace device
     };
 
     template <template <typename> class Op, typename T>
-    void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal, 
-        cudaStream_t stream)
+    void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal, cudaStream_t stream)
     {
         Op<T> op(thresh, maxVal);
-        ::cv::gpu::device::transform(src, dst, op, stream);
+        cv::gpu::device::transform(src, dst, op, WithOutMask(), stream);
     }
 
     template <typename T>
     void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type,
         cudaStream_t stream)
     {
-        typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal, 
-            cudaStream_t stream);
+        typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal, cudaStream_t stream);
 
         static const caller_t callers[] = 
         {
@@ -1737,7 +1735,7 @@ namespace cv { namespace gpu { namespace device
     template<typename T>
     void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream)
     {
-        ::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
+        cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), WithOutMask(), stream);
     }   
 
     template void pow_caller<uchar>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
@@ -1829,7 +1827,7 @@ namespace cv { namespace gpu { namespace device
 
         AddWeighted<T1, T2, D> op(alpha, beta, gamma);
 
-        ::cv::gpu::device::transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, stream);
+        cv::gpu::device::transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, WithOutMask(), stream);
     }
 
     template void addWeighted_gpu<uchar, uchar, uchar>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu
index da697a7bf..6d3d7c5e6 100644
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
@@ -44,7 +44,6 @@
 #include "opencv2/gpu/device/limits.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/vec_math.hpp"
-#include "opencv2/gpu/device/transform.hpp"
 
 namespace cv { namespace gpu { namespace device 
 {
diff --git a/modules/gpu/src/cuda/remap.cu b/modules/gpu/src/cuda/remap.cu
index e2cdc3b4b..f77adee92 100644
--- a/modules/gpu/src/cuda/remap.cu
+++ b/modules/gpu/src/cuda/remap.cu
@@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device
 
         template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
         {
-            static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, 
+            static void call(DevMem2D_<T> src, DevMem2Df mapx, DevMem2Df mapy, DevMem2D_<T> dst, 
                 const float* borderValue, cudaStream_t stream, int)
             {
                 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; 
@@ -86,7 +86,8 @@ namespace cv { namespace gpu { namespace device
 
         template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
         {
-            static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, int)
+            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, 
+                DevMem2D_<T> dst, const float* borderValue, int)
             {
                 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; 
                 
@@ -110,20 +111,23 @@ namespace cv { namespace gpu { namespace device
             { \
                 typedef type elem_type; \
                 typedef int index_type; \
+                int xoff, yoff; \
+                tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
                 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
                 { \
-                    return tex2D(tex_remap_ ## type , x, y); \
+                    return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
                 } \
             }; \
             template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
             { \
-                static void call(const DevMem2D_< type >& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_< type >& dst, const float* borderValue, int cc) \
+                static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, \
+                    DevMem2D_< type > dst, const float* borderValue, int cc) \
                 { \
                     typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
                     dim3 block(32, cc >= 20 ? 8 : 4); \
                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
-                    bindTexture(&tex_remap_ ## type , src); \
-                    tex_remap_ ## type ##_reader texSrc; \
+                    bindTexture(&tex_remap_ ## type , srcWhole); \
+                    tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
                     B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
                     BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
                     Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
@@ -134,14 +138,25 @@ namespace cv { namespace gpu { namespace device
             }; \
             template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
             { \
-                static void call(const DevMem2D_< type >& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_< type >& dst, const float*, int) \
+                static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, \
+                    DevMem2D_< type > dst, const float*, int) \
                 { \
                     dim3 block(32, 8); \
                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
-                    bindTexture(&tex_remap_ ## type , src); \
-                    tex_remap_ ## type ##_reader texSrc; \
-                    Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
-                    remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
+                    bindTexture(&tex_remap_ ## type , srcWhole); \
+                    tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
+                    if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
+                    { \
+                        Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
+                        remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
+                    } \
+                    else \
+                    { \
+                        BrdReplicate<type> brd(src.rows, src.cols); \
+                        BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
+                        Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
+                        remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
+                    } \
                     cudaSafeCall( cudaGetLastError() ); \
                     cudaSafeCall( cudaDeviceSynchronize() ); \
                 } \
@@ -175,21 +190,21 @@ namespace cv { namespace gpu { namespace device
 
         template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
         { 
-            static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, 
-                const float* borderValue, cudaStream_t stream, int cc)
+            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, 
+                DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
             {
                 if (stream == 0)
-                    RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, cc);
+                    RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
                 else
                     RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
             }
         };
 
-        template <typename T> void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, 
-            int borderMode, const float* borderValue, cudaStream_t stream, int cc)
+        template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, 
+            DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
         {
-            typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst, 
-                const float* borderValue, cudaStream_t stream, int cc);
+            typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, 
+                DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc);
 
             static const caller_t callers[3][5] = 
             {
@@ -216,37 +231,38 @@ namespace cv { namespace gpu { namespace device
                 }
             };
 
-            callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
+            callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap, 
+                static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
         }
 
-        template void remap_gpu<uchar >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<uchar2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<uchar3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<uchar4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
 
-        //template void remap_gpu<schar>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<char2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<char3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<char4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
 
-        template void remap_gpu<ushort >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<ushort2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<ushort3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<ushort4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
 
-        template void remap_gpu<short >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<short2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<short3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<short4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
 
-        //template void remap_gpu<int >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<int2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<int3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<int4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
 
-        template void remap_gpu<float >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        //template void remap_gpu<float2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<float3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
-        template void remap_gpu<float4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        //template void remap_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+        template void remap_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
     } // namespace imgproc
 }}} // namespace cv { namespace gpu { namespace device
diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu
index 34c8f66f4..af4059328 100644
--- a/modules/gpu/src/cuda/resize.cu
+++ b/modules/gpu/src/cuda/resize.cu
@@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device
 
         template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
         {
-            static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
+            static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
             {            
                 dim3 block(32, 8);
                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -95,7 +95,7 @@ namespace cv { namespace gpu { namespace device
         };
         template <typename T> struct ResizeDispatcherStream<PointFilter, T>
         {
-            static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
+            static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
             {            
                 dim3 block(32, 8);
                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -110,7 +110,7 @@ namespace cv { namespace gpu { namespace device
 
         template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
         {
-            static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
+            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
             {            
                 dim3 block(32, 8);
                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -127,7 +127,7 @@ namespace cv { namespace gpu { namespace device
         };
         template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
         {
-            static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
+            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
             {            
                 dim3 block(32, 8);
                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -148,19 +148,21 @@ namespace cv { namespace gpu { namespace device
             { \
                 typedef type elem_type; \
                 typedef int index_type; \
+                int xoff, yoff; \
+                tex_resize_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
                 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
                 { \
-                    return tex2D(tex_resize_ ## type , x, y); \
+                    return tex2D(tex_resize_ ## type , x + xoff, y + yoff); \
                 } \
             }; \
             template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type> \
             { \
-                static void call(const DevMem2D_< type >& src, float fx, float fy, const DevMem2D_< type >& dst) \
+                static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \
                 { \
                     dim3 block(32, 8); \
                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
-                    bindTexture(&tex_resize_ ## type , src); \
-                    tex_resize_ ## type ##_reader texSrc; \
+                    bindTexture(&tex_resize_ ## type , srcWhole); \
+                    tex_resize_ ## type ##_reader texSrc(xoff, yoff); \
                     Filter< tex_resize_ ## type ##_reader > filter_src(texSrc); \
                     resize<<<grid, block>>>(filter_src, fx, fy, dst); \
                     cudaSafeCall( cudaGetLastError() ); \
@@ -169,12 +171,12 @@ namespace cv { namespace gpu { namespace device
             }; \
             template <> struct ResizeDispatcherNonStream<PointFilter, type> \
             { \
-                static void call(const DevMem2D_< type >& src, float fx, float fy, const DevMem2D_< type >& dst) \
+                static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \
                 { \
                     dim3 block(32, 8); \
                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
-                    bindTexture(&tex_resize_ ## type , src); \
-                    tex_resize_ ## type ##_reader texSrc; \
+                    bindTexture(&tex_resize_ ## type , srcWhole); \
+                    tex_resize_ ## type ##_reader texSrc(xoff, yoff); \
                     resizeNN<<<grid, block>>>(texSrc, fx, fy, dst); \
                     cudaSafeCall( cudaGetLastError() ); \
                     cudaSafeCall( cudaDeviceSynchronize() ); \
@@ -209,55 +211,57 @@ namespace cv { namespace gpu { namespace device
 
         template <template <typename> class Filter, typename T> struct ResizeDispatcher
         { 
-            static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
+            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
             {
                 if (stream == 0)
-                    ResizeDispatcherNonStream<Filter, T>::call(src, fx, fy, dst);
+                    ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
                 else
                     ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
             }
         };
 
-        template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream)
+        template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, 
+            DevMem2Db dst, int interpolation, cudaStream_t stream)
         {
-            typedef void (*caller_t)(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream);
+            typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
 
             static const caller_t callers[3] = 
             {
                 ResizeDispatcher<PointFilter, T>::call, ResizeDispatcher<LinearFilter, T>::call, ResizeDispatcher<CubicFilter, T>::call
             };
 
-            callers[interpolation](static_cast< DevMem2D_<T> >(src), fx, fy, static_cast< DevMem2D_<T> >(dst), stream);
+            callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy, 
+                static_cast< DevMem2D_<T> >(dst), stream);
         }
 
-        template void resize_gpu<uchar >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<uchar2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<uchar3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<uchar4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
 
-        //template void resize_gpu<schar>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<char2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<char3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<char4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
 
-        template void resize_gpu<ushort >(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<ushort2>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<ushort3>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<ushort4>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
 
-        template void resize_gpu<short >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<short2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<short3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<short4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
 
-        //template void resize_gpu<int >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<int2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<int3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<int4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
 
-        template void resize_gpu<float >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        //template void resize_gpu<float2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<float3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
-        template void resize_gpu<float4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        //template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+        template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
     } // namespace imgproc
 }}} // namespace cv { namespace gpu { namespace device
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index 3be2e052b..e4410eced 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -114,7 +114,7 @@ namespace cv { namespace gpu { namespace device
     namespace imgproc 
     {
         template <typename T> 
-        void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, 
+        void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, 
                        int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
     }
 }}}
@@ -123,8 +123,9 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
 {
     using namespace ::cv::gpu::device::imgproc;
 
-    typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, 
+    typedef void (*caller_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, 
         int borderMode, const float* borderValue, cudaStream_t stream, int cc);
+
     static const caller_t callers[6][4] = 
     {
         {remap_gpu<uchar>, 0/*remap_gpu<uchar2>*/, remap_gpu<uchar3>, remap_gpu<uchar4>},
@@ -154,8 +155,13 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
 
     DeviceInfo info;
     int cc = info.majorVersion() * 10 + info.minorVersion();
+    
+    Size wholeSize;
+    Point ofs;
+    src.locateROI(wholeSize, ofs);
 
-    func(src, xmap, ymap, dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc);
+    func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap, 
+        dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc);
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -310,7 +316,8 @@ namespace cv { namespace gpu { namespace device
 {
     namespace imgproc 
     {
-        template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, 
+            DevMem2Db dst, int interpolation, cudaStream_t stream);
     }
 }}}
 
@@ -342,18 +349,25 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
     }
 
     cudaStream_t stream = StreamAccessor::getStream(s);
+    
+    Size wholeSize;
+    Point ofs;
+    src.locateROI(wholeSize, ofs);
 
     if ((src.type() == CV_8UC1 || src.type() == CV_8UC4) && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR))
     {
         static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
 
         NppiSize srcsz;
-        srcsz.width  = src.cols;
-        srcsz.height = src.rows;
+        srcsz.width  = wholeSize.width;
+        srcsz.height = wholeSize.height;
+
         NppiRect srcrect;
-        srcrect.x = srcrect.y = 0;
+        srcrect.x = ofs.x;
+        srcrect.y = ofs.y;
         srcrect.width  = src.cols;
         srcrect.height = src.rows;
+
         NppiSize dstsz;
         dstsz.width  = dst.cols;
         dstsz.height = dst.rows;
@@ -362,12 +376,12 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
 
         if (src.type() == CV_8UC1)
         {
-            nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,
+            nppSafeCall( nppiResize_8u_C1R(src.datastart, srcsz, static_cast<int>(src.step), srcrect,
                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
         }
         else
         {
-            nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,
+            nppSafeCall( nppiResize_8u_C4R(src.datastart, srcsz, static_cast<int>(src.step), srcrect,
                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
         }
 
@@ -378,7 +392,8 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
     {
         using namespace ::cv::gpu::device::imgproc;
 
-        typedef void (*caller_t)(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
+        typedef void (*caller_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
+
         static const caller_t callers[6][4] = 
         {
             {resize_gpu<uchar>, 0/*resize_gpu<uchar2>*/, resize_gpu<uchar3>, resize_gpu<uchar4>},
@@ -389,7 +404,8 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
             {resize_gpu<float>, 0/*resize_gpu<float2>*/, resize_gpu<float3>, resize_gpu<float4>}
         };
 
-        callers[src.depth()][src.channels() - 1](src, static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);
+        callers[src.depth()][src.channels() - 1](src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, 
+            static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);
     }
 }
 
@@ -526,14 +542,21 @@ namespace
         CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
 
         dst.create(dsize, src.type());
+    
+        Size wholeSize;
+        Point ofs;
+        src.locateROI(wholeSize, ofs);
 
         NppiSize srcsz;
-        srcsz.height = src.rows;
-        srcsz.width = src.cols;
+        srcsz.height = wholeSize.height;
+        srcsz.width = wholeSize.width;
+
         NppiRect srcroi;
-        srcroi.x = srcroi.y = 0;
+        srcroi.x = ofs.x;
+        srcroi.y = ofs.y;
         srcroi.height = src.rows;
         srcroi.width = src.cols;
+
         NppiRect dstroi;
         dstroi.x = dstroi.y = 0;
         dstroi.height = dst.rows;
@@ -546,19 +569,19 @@ namespace
         switch (src.depth())
         {
         case CV_8U:
-            nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,
+            nppSafeCall( npp_warp_8u[src.channels()][warpInd]((Npp8u*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
             break;
         case CV_16U:
-            nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr<Npp16u>(), srcsz, static_cast<int>(src.step), srcroi,
+            nppSafeCall( npp_warp_16u[src.channels()][warpInd]((Npp16u*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
                 dst.ptr<Npp16u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
             break;
         case CV_32S:
-            nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr<Npp32s>(), srcsz, static_cast<int>(src.step), srcroi,
+            nppSafeCall( npp_warp_32s[src.channels()][warpInd]((Npp32s*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
             break;
         case CV_32F:
-            nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr<Npp32f>(), srcsz, static_cast<int>(src.step), srcroi,
+            nppSafeCall( npp_warp_32f[src.channels()][warpInd]((Npp32f*)src.datastart, srcsz, static_cast<int>(src.step), srcroi,
                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );
             break;
         default:
diff --git a/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp b/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
index e053cb6bf..28ced8c5d 100644
--- a/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
@@ -386,20 +386,6 @@ namespace cv { namespace gpu { namespace device
                     cudaSafeCall( cudaDeviceSynchronize() );            
             }
         };        
-
-        template <typename T, typename D, typename UnOp, typename Mask>
-        static inline void transform_caller(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, Mask mask, cudaStream_t stream)
-        {
-            typedef TransformFunctorTraits<UnOp> ft;
-            TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
-        }
-
-        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
-        static inline void transform_caller(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, Mask mask, cudaStream_t stream)
-        {
-            typedef TransformFunctorTraits<BinOp> ft;
-            TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
-        }
     } // namespace transform_detail
 }}} // namespace cv { namespace gpu { namespace device
 
diff --git a/modules/gpu/src/opencv2/gpu/device/transform.hpp b/modules/gpu/src/opencv2/gpu/device/transform.hpp
index b7e130315..89eed7e67 100644
--- a/modules/gpu/src/opencv2/gpu/device/transform.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/transform.hpp
@@ -49,28 +49,18 @@
 
 namespace cv { namespace gpu { namespace device 
 {
-    template <typename T, typename D, typename UnOp>
-    static inline void transform(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, cudaStream_t stream = 0)
+    template <typename T, typename D, typename UnOp, typename Mask>
+    static inline void transform(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, Mask mask, cudaStream_t stream)
     {
-        transform_detail::transform_caller(src, dst, op, WithOutMask(), stream);
+        typedef TransformFunctorTraits<UnOp> ft;
+        transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
     }
 
-    template <typename T, typename D, typename UnOp>
-    static inline void transform(DevMem2D_<T> src, DevMem2D_<D> dst, PtrStepb mask, UnOp op, cudaStream_t stream = 0)
+    template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+    static inline void transform(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, Mask mask, cudaStream_t stream)
     {
-        transform_detail::transform_caller(src, dst, op, SingleMask(mask), stream);
-    }
-
-    template <typename T1, typename T2, typename D, typename BinOp>
-    static inline void transform(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, cudaStream_t stream = 0)
-    {
-        transform_detail::transform_caller(src1, src2, dst, op, WithOutMask(), stream);
-    }
-
-    template <typename T1, typename T2, typename D, typename BinOp>
-    static inline void transform(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, PtrStepb mask, BinOp op, cudaStream_t stream = 0)
-    {
-        transform_detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
+        typedef TransformFunctorTraits<BinOp> ft;
+        transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
     }
 }}}
 
diff --git a/modules/gpu/src/opencv2/gpu/device/utility.hpp b/modules/gpu/src/opencv2/gpu/device/utility.hpp
index f4952b958..cb702b739 100644
--- a/modules/gpu/src/opencv2/gpu/device/utility.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/utility.hpp
@@ -69,7 +69,7 @@ namespace cv { namespace gpu { namespace device
 
     struct SingleMask
     {
-        explicit __host__ __device__ __forceinline__ SingleMask(const PtrStepb& mask_) : mask(mask_) {}
+        explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
         
         __device__ __forceinline__ bool operator()(int y, int x) const
         {            
@@ -79,6 +79,19 @@ namespace cv { namespace gpu { namespace device
         PtrStepb mask;
     };
 
+    struct SingleMaskChannels
+    {
+        __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_) : mask(mask_), channels(channels_) {}
+        
+        __device__ __forceinline__ bool operator()(int y, int x) const
+        {            
+            return mask.ptr(y)[x / channels] != 0;
+        }
+
+        PtrStepb mask;
+        int channels;
+    };
+
     struct MaskCollection
     {
         explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_) : maskCollection(maskCollection_) {}
diff --git a/modules/gpu/test/test_arithm.cpp b/modules/gpu/test/test_arithm.cpp
index f3a2039e0..51b6955f1 100644
--- a/modules/gpu/test/test_arithm.cpp
+++ b/modules/gpu/test/test_arithm.cpp
@@ -43,41 +43,46 @@
 
 #ifdef HAVE_CUDA
 
-struct ArithmTest : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+using namespace cvtest;
+using namespace testing;
+
+PARAM_TEST_CASE(ArithmTestBase, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
-    cv::Mat mat1, mat2;
+    cv::Mat mat1; 
+    cv::Mat mat2;
+    cv::Scalar val;
         
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        mat1 = cvtest::randomMat(rng, size, type, 1, 16, false);
-        mat2 = cvtest::randomMat(rng, size, type, 1, 16, false);
+        mat1 = randomMat(rng, size, type, 1, 16, false);
+        mat2 = randomMat(rng, size, type, 1, 16, false);
+
+        val = cv::Scalar(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
     }
 };
 
 ////////////////////////////////////////////////////////////////////////////////
 // add
 
-struct AddArray : ArithmTest {};
+struct Add : ArithmTestBase {};
 
-TEST_P(AddArray, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    
+TEST_P(Add, Array) 
+{    
     cv::Mat dst_gold;
     cv::add(mat1, mat2, dst_gold);
 
@@ -86,7 +91,7 @@ TEST_P(AddArray, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::add(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
+        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpuRes);
 
         gpuRes.download(dst);
     );
@@ -94,24 +99,8 @@ TEST_P(AddArray, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, AddArray, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32SC1, CV_32FC1)));
-
-struct AddScalar : ArithmTest {};
-
-TEST_P(AddScalar, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
-
-    PRINT_PARAM(val);
-    
+TEST_P(Add, Scalar) 
+{    
     cv::Mat dst_gold;
     cv::add(mat1, val, dst_gold);
 
@@ -120,7 +109,7 @@ TEST_P(AddScalar, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::add(cv::gpu::GpuMat(mat1), val, gpuRes);
+        cv::gpu::add(loadMat(mat1, useRoi), val, gpuRes);
 
         gpuRes.download(dst);
     );
@@ -128,21 +117,18 @@ TEST_P(AddScalar, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, AddScalar, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1, CV_32FC2)));
+INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // subtract
 
-struct SubtractArray : ArithmTest {};
+struct Subtract : ArithmTestBase {};
 
-TEST_P(SubtractArray, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    
+TEST_P(Subtract, Array) 
+{    
     cv::Mat dst_gold;
     cv::subtract(mat1, mat2, dst_gold);
 
@@ -151,7 +137,7 @@ TEST_P(SubtractArray, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::subtract(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
+        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpuRes);
 
         gpuRes.download(dst);
     );
@@ -159,24 +145,8 @@ TEST_P(SubtractArray, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, SubtractArray, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32SC1, CV_32FC1)));
-
-struct SubtractScalar : ArithmTest {};
-
-TEST_P(SubtractScalar, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
-
-    PRINT_PARAM(val);
-    
+TEST_P(Subtract, Scalar) 
+{    
     cv::Mat dst_gold;
     cv::subtract(mat1, val, dst_gold);
 
@@ -185,29 +155,26 @@ TEST_P(SubtractScalar, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::subtract(cv::gpu::GpuMat(mat1), val, gpuRes);
+        cv::gpu::subtract(loadMat(mat1, useRoi), val, gpuRes);
 
         gpuRes.download(dst);
     );
 
-    ASSERT_LE(checkNorm(dst_gold, dst), 1e-5);
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, SubtractScalar, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1, CV_32FC2)));
+INSTANTIATE_TEST_CASE_P(Arithm, Subtract, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // multiply
 
-struct MultiplyArray : ArithmTest {};
+struct Multiply : ArithmTestBase {};
 
-TEST_P(MultiplyArray, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    
+TEST_P(Multiply, Array) 
+{    
     cv::Mat dst_gold;
     cv::multiply(mat1, mat2, dst_gold);
 
@@ -216,7 +183,7 @@ TEST_P(MultiplyArray, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::multiply(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
+        cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpuRes);
 
         gpuRes.download(dst);
     );
@@ -224,24 +191,8 @@ TEST_P(MultiplyArray, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, MultiplyArray, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32SC1, CV_32FC1)));
-
-struct MultiplyScalar : ArithmTest {};
-
-TEST_P(MultiplyScalar, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
-
-    PRINT_PARAM(val);
-    
+TEST_P(Multiply, Scalar) 
+{    
     cv::Mat dst_gold;
     cv::multiply(mat1, val, dst_gold);
 
@@ -250,7 +201,7 @@ TEST_P(MultiplyScalar, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::multiply(cv::gpu::GpuMat(mat1), val, gpuRes);
+        cv::gpu::multiply(loadMat(mat1, useRoi), val, gpuRes);
 
         gpuRes.download(dst);
     );
@@ -258,21 +209,18 @@ TEST_P(MultiplyScalar, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, MultiplyScalar, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(Arithm, Multiply, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // divide
 
-struct DivideArray : ArithmTest {};
+struct Divide : ArithmTestBase {};
 
-TEST_P(DivideArray, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    
+TEST_P(Divide, Array) 
+{    
     cv::Mat dst_gold;
     cv::divide(mat1, mat2, dst_gold);
 
@@ -281,7 +229,7 @@ TEST_P(DivideArray, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::divide(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
+        cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpuRes);
 
         gpuRes.download(dst);
     );
@@ -289,24 +237,8 @@ TEST_P(DivideArray, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, DivideArray, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32SC1, CV_32FC1)));
-
-struct DivideScalar : ArithmTest {};
-
-TEST_P(DivideScalar, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
-
-    PRINT_PARAM(val);
-    
+TEST_P(Divide, Scalar) 
+{    
     cv::Mat dst_gold;
     cv::divide(mat1, val, dst_gold);
 
@@ -315,7 +247,7 @@ TEST_P(DivideScalar, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::divide(cv::gpu::GpuMat(mat1), val, gpuRes);
+        cv::gpu::divide(loadMat(mat1, useRoi), val, gpuRes);
 
         gpuRes.download(dst);
     );
@@ -323,21 +255,18 @@ TEST_P(DivideScalar, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, DivideScalar, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(Arithm, Divide, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // transpose
 
-struct Transpose : ArithmTest {};
+struct Transpose : ArithmTestBase {};
 
 TEST_P(Transpose, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     cv::Mat dst_gold;
     cv::transpose(mat1, dst_gold);
 
@@ -346,7 +275,7 @@ TEST_P(Transpose, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::transpose(cv::gpu::GpuMat(mat1), gpuRes);
+        cv::gpu::transpose(loadMat(mat1, useRoi), gpuRes);
 
         gpuRes.download(dst);
     );
@@ -354,21 +283,18 @@ TEST_P(Transpose, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Transpose, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_32FC1, CV_32FC2, CV_64FC1)));
+INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_32FC1, CV_32FC2, CV_64FC1),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // absdiff
 
-struct AbsdiffArray : ArithmTest {};
+struct Absdiff : ArithmTestBase {};
 
-TEST_P(AbsdiffArray, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    
+TEST_P(Absdiff, Array) 
+{    
     cv::Mat dst_gold;
     cv::absdiff(mat1, mat2, dst_gold);
 
@@ -377,7 +303,7 @@ TEST_P(AbsdiffArray, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::absdiff(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
+        cv::gpu::absdiff(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpuRes);
 
         gpuRes.download(dst);
     );
@@ -385,24 +311,8 @@ TEST_P(AbsdiffArray, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, AbsdiffArray, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32SC1, CV_32FC1)));
-
-struct AbsdiffScalar : ArithmTest {};
-
-TEST_P(AbsdiffScalar, Accuracy) 
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
-
-    PRINT_PARAM(val);
-    
+TEST_P(Absdiff, Scalar) 
+{    
     cv::Mat dst_gold;
     cv::absdiff(mat1, val, dst_gold);
 
@@ -411,7 +321,7 @@ TEST_P(AbsdiffScalar, Accuracy)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::absdiff(cv::gpu::GpuMat(mat1), val, gpuRes);
+        cv::gpu::absdiff(loadMat(mat1, useRoi), val, gpuRes);
 
         gpuRes.download(dst);
     );
@@ -419,17 +329,20 @@ TEST_P(AbsdiffScalar, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, AbsdiffScalar, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // compare
 
-struct Compare : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> > 
+PARAM_TEST_CASE(Compare, cv::gpu::DeviceInfo, MatType, CmpCode, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    int type;
     int cmp_code;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat1, mat2;
@@ -438,17 +351,19 @@ struct Compare : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, in
         
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        cmp_code = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        cmp_code = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        mat1 = cvtest::randomMat(rng, size, CV_32FC1, 1, 16, false);
-        mat2 = cvtest::randomMat(rng, size, CV_32FC1, 1, 16, false);
+        mat1 = randomMat(rng, size, type, 1, 16, false);
+        mat2 = randomMat(rng, size, type, 1, 16, false);
 
         cv::compare(mat1, mat2, dst_gold, cmp_code);
     }
@@ -456,19 +371,12 @@ struct Compare : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, in
 
 TEST_P(Compare, Accuracy) 
 {
-    static const char* cmp_codes[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
-    const char* cmpCodeStr = cmp_codes[cmp_code];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-    PRINT_PARAM(cmpCodeStr);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::compare(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes, cmp_code);
+        cv::gpu::compare(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpuRes, cmp_code);
 
         gpuRes.download(dst);
     );
@@ -476,16 +384,19 @@ TEST_P(Compare, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Compare, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values((int)cv::CMP_EQ, (int)cv::CMP_GT, (int)cv::CMP_GE, (int)cv::CMP_LT, (int)cv::CMP_LE, (int)cv::CMP_NE)));
+INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_16UC1, CV_32SC1),
+                        Values((int) cv::CMP_EQ, (int) cv::CMP_GT, (int) cv::CMP_GE, (int) cv::CMP_LT, (int) cv::CMP_LE, (int) cv::CMP_NE),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // meanStdDev
 
-struct MeanStdDev : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(MeanStdDev, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -495,15 +406,16 @@ struct MeanStdDev : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp() 
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        mat = cvtest::randomMat(rng, size, CV_8UC1, 1, 255, false);
+        mat = randomMat(rng, size, CV_8UC1, 1, 255, false);
 
         cv::meanStdDev(mat, mean_gold, stddev_gold);
     }
@@ -511,14 +423,11 @@ struct MeanStdDev : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(MeanStdDev, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Scalar mean;
     cv::Scalar stddev;
     
     ASSERT_NO_THROW(
-        cv::gpu::meanStdDev(cv::gpu::GpuMat(mat), mean, stddev);
+        cv::gpu::meanStdDev(loadMat(mat, useRoi), mean, stddev);
     );
 
     EXPECT_NEAR(mean_gold[0], mean[0], 1e-5);
@@ -532,18 +441,18 @@ TEST_P(MeanStdDev, Accuracy)
     EXPECT_NEAR(stddev_gold[3], stddev[3], 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // normDiff
 
-static const int norms[] = {cv::NORM_INF, cv::NORM_L1, cv::NORM_L2};
-static const char* norms_str[] = {"NORM_INF", "NORM_L1", "NORM_L2"};
-
-struct NormDiff : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(NormDiff, cv::gpu::DeviceInfo, NormCode, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
-    int normIdx;
+    int normCode;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat1, mat2;
@@ -552,51 +461,48 @@ struct NormDiff : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, i
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        normIdx = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        normCode = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        mat1 = cvtest::randomMat(rng, size, CV_8UC1, 1, 255, false);
-        mat2 = cvtest::randomMat(rng, size, CV_8UC1, 1, 255, false);
+        mat1 = randomMat(rng, size, CV_8UC1, 1, 255, false);
+        mat2 = randomMat(rng, size, CV_8UC1, 1, 255, false);
 
-        norm_gold = cv::norm(mat1, mat2, norms[normIdx]);
+        norm_gold = cv::norm(mat1, mat2, normCode);
     }
 };
 
 TEST_P(NormDiff, Accuracy) 
-{
-    const char* normStr = norms_str[normIdx];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-    PRINT_PARAM(normStr);
-    
+{    
     double norm;
     
     ASSERT_NO_THROW(
-        norm = cv::gpu::norm(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), norms[normIdx]);
+        norm = cv::gpu::norm(loadMat(mat1, useRoi), loadMat(mat2, useRoi), normCode);
     );
 
     EXPECT_NEAR(norm_gold, norm, 1e-6);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, NormDiff, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Range(0, 3)));
+INSTANTIATE_TEST_CASE_P(Arithm, NormDiff, Combine(
+                        ALL_DEVICES,
+                        Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // flip
 
-struct Flip : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(Flip, cv::gpu::DeviceInfo, MatType, FlipCode, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
     int flip_code;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -605,38 +511,31 @@ struct Flip : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        flip_code = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        flip_code = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        mat = cvtest::randomMat(rng, size, type, 1, 255, false);
+        mat = randomMat(rng, size, type, 1, 255, false);
 
         cv::flip(mat, dst_gold, flip_code);
     }
 };
 
 TEST_P(Flip, Accuracy) 
-{
-    static const char* flip_axis[] = {"Both", "X", "Y"};
-    const char* flipAxisStr = flip_axis[flip_code + 1];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(flipAxisStr);
-    
+{    
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpu_res;
 
-        cv::gpu::flip(cv::gpu::GpuMat(mat), gpu_res, flip_code);
+        cv::gpu::flip(loadMat(mat, useRoi), gpu_res, flip_code);
 
         gpu_res.download(dst);
     );
@@ -644,18 +543,20 @@ TEST_P(Flip, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Flip, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC4),
-                        testing::Values(0, 1, -1)));
+INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_8UC4),
+                        Values((int)FLIP_BOTH, (int)FLIP_X, (int)FLIP_Y),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // LUT
 
-struct LUT : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(LUT, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -665,17 +566,18 @@ struct LUT : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        mat = cvtest::randomMat(rng, size, type, 1, 255, false);
-        lut = cvtest::randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false);
+        mat = randomMat(rng, size, type, 1, 255, false);
+        lut = randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false);
 
         cv::LUT(mat, lut, dst_gold);
     }
@@ -683,16 +585,12 @@ struct LUT : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 
 TEST_P(LUT, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpu_res;
 
-        cv::gpu::LUT(cv::gpu::GpuMat(mat), lut, gpu_res);
+        cv::gpu::LUT(loadMat(mat, useRoi), lut, gpu_res);
 
         gpu_res.download(dst);
     );
@@ -700,16 +598,18 @@ TEST_P(LUT, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, LUT, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC3)));
+INSTANTIATE_TEST_CASE_P(Arithm, LUT, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_8UC3),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // exp
 
-struct Exp : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -718,15 +618,16 @@ struct Exp : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp() 
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat = cvtest::randomMat(rng, size, CV_32FC1, -10.0, 2.0, false);        
+        mat = randomMat(rng, size, CV_32FC1, -10.0, 2.0, false);        
 
         cv::exp(mat, dst_gold);
     }
@@ -734,15 +635,12 @@ struct Exp : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(Exp, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpu_res;
 
-        cv::gpu::exp(cv::gpu::GpuMat(mat), gpu_res);
+        cv::gpu::exp(loadMat(mat, useRoi), gpu_res);
 
         gpu_res.download(dst);
     );
@@ -750,17 +648,18 @@ TEST_P(Exp, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::ValuesIn(devices()));
-
-
+INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // pow
 
-struct Pow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     double power;
     cv::Size size;
@@ -769,18 +668,18 @@ struct Pow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
     cv::Mat dst_gold;
 
     virtual void SetUp() 
-    {        
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());        
+    {
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
-        //size = cv::Size(2, 2);
 
-        mat = cvtest::randomMat(rng, size, type, 0.0, 100.0, false);        
+        mat = randomMat(rng, size, type, 0.0, 100.0, false);        
 
         if (mat.depth() == CV_32F)
             power = rng.uniform(1.2f, 3.f);
@@ -789,43 +688,38 @@ struct Pow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
             int ipower = rng.uniform(2, 8);
             power = (float)ipower;
         }
+
         cv::pow(mat, power, dst_gold);
     }
 };
 
 TEST_P(Pow, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(power);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpu_res;
 
-        cv::gpu::pow(cv::gpu::GpuMat(mat), power, gpu_res);
+        cv::gpu::pow(loadMat(mat, useRoi), power, gpu_res);
 
         gpu_res.download(dst);
     );
 
-    /*std::cout  << mat << std::endl << std::endl;
-    std::cout  << dst << std::endl << std::endl;
-    std::cout  << dst_gold << std::endl;*/
     EXPECT_MAT_NEAR(dst_gold, dst, 2);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Pow, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_32F, CV_32FC3)));
+INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(
+                        ALL_DEVICES,
+                        Values(CV_32F, CV_32FC3),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // log
 
-struct Log : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -834,15 +728,16 @@ struct Log : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp() 
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);        
+        mat = randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);        
 
         cv::log(mat, dst_gold);
     }
@@ -850,15 +745,12 @@ struct Log : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(Log, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpu_res;
 
-        cv::gpu::log(cv::gpu::GpuMat(mat), gpu_res);
+        cv::gpu::log(loadMat(mat, useRoi), gpu_res);
 
         gpu_res.download(dst);
     );
@@ -866,14 +758,17 @@ TEST_P(Log, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Log, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // magnitude
 
-struct Magnitude : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(Magnitude, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat1, mat2;
@@ -882,16 +777,17 @@ struct Magnitude : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp() 
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat1 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);
-        mat2 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);       
+        mat1 = randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);
+        mat2 = randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);       
 
         cv::magnitude(mat1, mat2, dst_gold);
     }
@@ -899,15 +795,12 @@ struct Magnitude : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(Magnitude, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpu_res;
 
-        cv::gpu::magnitude(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpu_res);
+        cv::gpu::magnitude(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpu_res);
 
         gpu_res.download(dst);
     );
@@ -915,14 +808,17 @@ TEST_P(Magnitude, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // phase
 
-struct Phase : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(Phase, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat1, mat2;
@@ -931,16 +827,17 @@ struct Phase : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp() 
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat1 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);
-        mat2 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);       
+        mat1 = randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);
+        mat2 = randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);       
 
         cv::phase(mat1, mat2, dst_gold);
     }
@@ -948,15 +845,12 @@ struct Phase : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(Phase, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpu_res;
 
-        cv::gpu::phase(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpu_res);
+        cv::gpu::phase(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpu_res);
 
         gpu_res.download(dst);
     );
@@ -964,14 +858,17 @@ TEST_P(Phase, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Phase, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // cartToPolar
 
-struct CartToPolar : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(CartToPolar, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat1, mat2;
@@ -981,16 +878,17 @@ struct CartToPolar : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp() 
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat1 = cvtest::randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);
-        mat2 = cvtest::randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);       
+        mat1 = randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);
+        mat2 = randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);       
 
         cv::cartToPolar(mat1, mat2, mag_gold, angle_gold);
     }
@@ -998,16 +896,13 @@ struct CartToPolar : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(CartToPolar, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat mag, angle;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuMag;
         cv::gpu::GpuMat gpuAngle;
 
-        cv::gpu::cartToPolar(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuMag, gpuAngle);
+        cv::gpu::cartToPolar(loadMat(mat1, useRoi), loadMat(mat2, useRoi), gpuMag, gpuAngle);
 
         gpuMag.download(mag);
         gpuAngle.download(angle);
@@ -1017,14 +912,17 @@ TEST_P(CartToPolar, Accuracy)
     EXPECT_MAT_NEAR(angle_gold, angle, 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // polarToCart
 
-struct PolarToCart : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(PolarToCart, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mag;
@@ -1035,16 +933,17 @@ struct PolarToCart : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp() 
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mag = cvtest::randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);
-        angle = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 2.0 * CV_PI, false);       
+        mag = randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);
+        angle = randomMat(rng, size, CV_32FC1, 0.0, 2.0 * CV_PI, false);       
 
         cv::polarToCart(mag, angle, x_gold, y_gold);
     }
@@ -1052,16 +951,13 @@ struct PolarToCart : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(PolarToCart, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat x, y;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuX;
         cv::gpu::GpuMat gpuY;
 
-        cv::gpu::polarToCart(cv::gpu::GpuMat(mag), cv::gpu::GpuMat(angle), gpuX, gpuY);
+        cv::gpu::polarToCart(loadMat(mag, useRoi), loadMat(angle, useRoi), gpuX, gpuY);
 
         gpuX.download(x);
         gpuY.download(y);
@@ -1071,15 +967,18 @@ TEST_P(PolarToCart, Accuracy)
     EXPECT_MAT_NEAR(y_gold, y, 1e-4);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // minMax
 
-struct MinMax : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(MinMax, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -1090,17 +989,18 @@ struct MinMax : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
-        mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+        mat = randomMat(rng, size, type, 0.0, 127.0, false);
+        mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
 
         if (type != CV_8S)
         {
@@ -1131,34 +1031,32 @@ struct MinMax : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
 
 TEST_P(MinMax, Accuracy) 
 {
-    if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (type == CV_64F && !supportFeature(devInfo,  cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     double minVal, maxVal;
     
     ASSERT_NO_THROW(
-        cv::gpu::minMax(cv::gpu::GpuMat(mat), &minVal, &maxVal, cv::gpu::GpuMat(mask));
+        cv::gpu::minMax(loadMat(mat, useRoi), &minVal, &maxVal, loadMat(mask, useRoi));
     );
 
     EXPECT_DOUBLE_EQ(minVal_gold, minVal);
     EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, MinMax, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // minMaxLoc
 
-struct MinMaxLoc : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(MinMaxLoc, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -1171,17 +1069,18 @@ struct MinMaxLoc : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
-        mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+        mat = randomMat(rng, size, type, 0.0, 127.0, false);
+        mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
 
         if (type != CV_8S)
         {
@@ -1215,15 +1114,11 @@ TEST_P(MinMaxLoc, Accuracy)
     if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     double minVal, maxVal;
     cv::Point minLoc, maxLoc;
     
     ASSERT_NO_THROW(
-        cv::gpu::minMaxLoc(cv::gpu::GpuMat(mat), &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(mask));
+        cv::gpu::minMaxLoc(loadMat(mat, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi));
     );
 
     EXPECT_DOUBLE_EQ(minVal_gold, minVal);
@@ -1240,17 +1135,19 @@ TEST_P(MinMaxLoc, Accuracy)
     EXPECT_EQ(0, cmpMaxVals);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////
 // countNonZero
 
-struct CountNonZero : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(CountNonZero, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
@@ -1259,16 +1156,17 @@ struct CountNonZero : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInf
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        cv::Mat matBase = cvtest::randomMat(rng, size, CV_8U, 0.0, 1.0, false);
+        cv::Mat matBase = randomMat(rng, size, CV_8U, 0.0, 1.0, false);
         matBase.convertTo(mat, type);
 
         n_gold = cv::countNonZero(mat);
@@ -1280,66 +1178,59 @@ TEST_P(CountNonZero, Accuracy)
     if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     int n;
     
     ASSERT_NO_THROW(
-        n = cv::gpu::countNonZero(cv::gpu::GpuMat(mat));
+        n = cv::gpu::countNonZero(loadMat(mat, useRoi));
     );
 
     ASSERT_EQ(n_gold, n);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
+                        USE_ROI));
 
 //////////////////////////////////////////////////////////////////////////////
 // sum
 
-struct Sum : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(Sum, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat;
 
-    cv::Scalar sum_gold;
-
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        mat = cvtest::randomMat(rng, size, CV_8U, 0.0, 10.0, false);
-
-        sum_gold = cv::sum(mat);
+        mat = randomMat(rng, size, CV_8U, 0.0, 10.0, false);
     }
 };
 
-TEST_P(Sum, Accuracy) 
+TEST_P(Sum, Simple) 
 {
     if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
+    cv::Scalar sum_gold = cv::sum(mat);
 
     cv::Scalar sum;
     
     ASSERT_NO_THROW(
-        sum = cv::gpu::sum(cv::gpu::GpuMat(mat));
+        sum = cv::gpu::sum(loadMat(mat, useRoi));
     );
 
     EXPECT_NEAR(sum[0], sum_gold[0], mat.size().area() * 1e-5);
@@ -1348,50 +1239,17 @@ TEST_P(Sum, Accuracy)
     EXPECT_NEAR(sum[3], sum_gold[3], mat.size().area() * 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Sum, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
-
-struct AbsSum : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
-{
-    cv::gpu::DeviceInfo devInfo;
-    int type;
-
-    cv::Size size;
-    cv::Mat mat;
-
-    cv::Scalar sum_gold;
-
-    virtual void SetUp() 
-    {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
-
-        mat = cvtest::randomMat(rng, size, CV_8U, 0.0, 10.0, false);
-
-        sum_gold = cv::norm(mat, cv::NORM_L1);
-    }
-};
-
-TEST_P(AbsSum, Accuracy) 
+TEST_P(Sum, Abs) 
 {
     if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
+    cv::Scalar sum_gold = cv::norm(mat, cv::NORM_L1);
 
     cv::Scalar sum;
     
     ASSERT_NO_THROW(
-        sum = cv::gpu::absSum(cv::gpu::GpuMat(mat));
+        sum = cv::gpu::absSum(loadMat(mat, useRoi));
     );
 
     EXPECT_NEAR(sum[0], sum_gold[0], mat.size().area() * 1e-5);
@@ -1400,52 +1258,19 @@ TEST_P(AbsSum, Accuracy)
     EXPECT_NEAR(sum[3], sum_gold[3], mat.size().area() * 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
-
-struct SqrSum : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
-{
-    cv::gpu::DeviceInfo devInfo;
-    int type;
-
-    cv::Size size;
-    cv::Mat mat;
-
-    cv::Scalar sum_gold;
-
-    virtual void SetUp() 
-    {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
-
-        mat = cvtest::randomMat(rng, size, CV_8U, 0.0, 10.0, false);
- 
-        cv::Mat sqrmat;
-        cv::multiply(mat, mat, sqrmat);
-        sum_gold = cv::sum(sqrmat);
-    }
-};
-
-TEST_P(SqrSum, Accuracy) 
+TEST_P(Sum, Sqr) 
 {
     if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
+    cv::Mat sqrmat;
+    multiply(mat, mat, sqrmat);
+    cv::Scalar sum_gold = sum(sqrmat);
 
     cv::Scalar sum;
     
     ASSERT_NO_THROW(
-        sum = cv::gpu::sqrSum(cv::gpu::GpuMat(mat));
+        sum = cv::gpu::sqrSum(loadMat(mat, useRoi));
     );
 
     EXPECT_NEAR(sum[0], sum_gold[0], mat.size().area() * 1e-5);
@@ -1454,73 +1279,15 @@ TEST_P(SqrSum, Accuracy)
     EXPECT_NEAR(sum[3], sum_gold[3], mat.size().area() * 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F),
+                        USE_ROI));
 
 //////////////////////////////////////////////////////////////////////////////
 // bitwise
 
-struct BitwiseNot : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
-{
-    cv::gpu::DeviceInfo devInfo;
-    int type;
-
-    cv::Size size;
-    cv::Mat mat;
-
-    cv::Mat dst_gold;
-
-    virtual void SetUp() 
-    {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
-
-        mat.create(size, type);
-        
-        for (int i = 0; i < mat.rows; ++i)
-        {
-            cv::Mat row(1, static_cast<int>(mat.cols * mat.elemSize()), CV_8U, (void*)mat.ptr(i));
-            rng.fill(row, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
-        }
-
-        dst_gold = ~mat;
-    }
-};
-
-TEST_P(BitwiseNot, Accuracy) 
-{
-    if (mat.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-        return;
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    cv::Mat dst;
-    
-    ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_dst;
-
-        cv::gpu::bitwise_not(cv::gpu::GpuMat(mat), dev_dst);
-
-        dev_dst.download(dst);
-    );
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseNot, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::ValuesIn(all_types())));
-
-struct BitwiseOr : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(Bitwise, cv::gpu::DeviceInfo, MatType)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
@@ -1529,12 +1296,10 @@ struct BitwiseOr : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
     cv::Mat mat1;
     cv::Mat mat2;
 
-    cv::Mat dst_gold;
-
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
@@ -1553,26 +1318,22 @@ struct BitwiseOr : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
             cv::Mat row2(1, static_cast<int>(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i));
             rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
         }
-
-        dst_gold = mat1 | mat2;
     }
 };
 
-TEST_P(BitwiseOr, Accuracy) 
+TEST_P(Bitwise, Not) 
 {
     if (mat1.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
+    cv::Mat dst_gold = ~mat1;
 
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
 
-        cv::gpu::bitwise_or(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), dev_dst);
+        cv::gpu::bitwise_not(loadMat(mat1), dev_dst);
 
         dev_dst.download(dst);
     );
@@ -1580,63 +1341,19 @@ TEST_P(BitwiseOr, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseOr, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::ValuesIn(all_types())));
-
-struct BitwiseAnd : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
-{
-    cv::gpu::DeviceInfo devInfo;
-    int type;
-
-    cv::Size size;
-    cv::Mat mat1;
-    cv::Mat mat2;
-
-    cv::Mat dst_gold;
-
-    virtual void SetUp() 
-    {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
-
-        mat1.create(size, type);
-        mat2.create(size, type);
-        
-        for (int i = 0; i < mat1.rows; ++i)
-        {
-            cv::Mat row1(1, static_cast<int>(mat1.cols * mat1.elemSize()), CV_8U, (void*)mat1.ptr(i));
-            rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
-
-            cv::Mat row2(1, static_cast<int>(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i));
-            rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
-        }
-
-        dst_gold = mat1 & mat2;
-    }
-};
-
-TEST_P(BitwiseAnd, Accuracy) 
+TEST_P(Bitwise, Or) 
 {
     if (mat1.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
+    cv::Mat dst_gold = mat1 | mat2;
 
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
 
-        cv::gpu::bitwise_and(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), dev_dst);
+        cv::gpu::bitwise_or(loadMat(mat1), loadMat(mat2), dev_dst);
 
         dev_dst.download(dst);
     );
@@ -1644,63 +1361,19 @@ TEST_P(BitwiseAnd, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseAnd, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::ValuesIn(all_types())));
-
-struct BitwiseXor : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
-{
-    cv::gpu::DeviceInfo devInfo;
-    int type;
-
-    cv::Size size;
-    cv::Mat mat1;
-    cv::Mat mat2;
-
-    cv::Mat dst_gold;
-
-    virtual void SetUp() 
-    {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
-        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
-
-        mat1.create(size, type);
-        mat2.create(size, type);
-        
-        for (int i = 0; i < mat1.rows; ++i)
-        {
-            cv::Mat row1(1, static_cast<int>(mat1.cols * mat1.elemSize()), CV_8U, (void*)mat1.ptr(i));
-            rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
-
-            cv::Mat row2(1, static_cast<int>(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i));
-            rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
-        }
-
-        dst_gold = mat1 ^ mat2;
-    }
-};
-
-TEST_P(BitwiseXor, Accuracy) 
+TEST_P(Bitwise, And) 
 {
     if (mat1.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
+    cv::Mat dst_gold = mat1 & mat2;
 
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
 
-        cv::gpu::bitwise_xor(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), dev_dst);
+        cv::gpu::bitwise_and(loadMat(mat1), loadMat(mat2), dev_dst);
 
         dev_dst.download(dst);
     );
@@ -1708,19 +1381,40 @@ TEST_P(BitwiseXor, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseXor, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::ValuesIn(all_types())));
+TEST_P(Bitwise, Xor) 
+{
+    if (mat1.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    cv::Mat dst_gold = mat1 ^ mat2;
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+
+        cv::gpu::bitwise_xor(loadMat(mat1), loadMat(mat2), dev_dst);
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise, Combine(
+                        ALL_DEVICES,
+                        ALL_TYPES));
 
 //////////////////////////////////////////////////////////////////////////////
 // addWeighted
 
-struct AddWeighted : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int, int> >
+PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, MatType, MatType, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type1;
     int type2;
     int dtype;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src1;
@@ -1733,19 +1427,20 @@ struct AddWeighted : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type1 = std::tr1::get<1>(GetParam());
-        type2 = std::tr1::get<2>(GetParam());
-        dtype = std::tr1::get<3>(GetParam());
+        devInfo = GET_PARAM(0);
+        type1 = GET_PARAM(1);
+        type2 = GET_PARAM(2);
+        dtype = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        src1 = cvtest::randomMat(rng, size, type1, 0.0, 255.0, false);
-        src2 = cvtest::randomMat(rng, size, type2, 0.0, 255.0, false);
+        src1 = randomMat(rng, size, type1, 0.0, 255.0, false);
+        src2 = randomMat(rng, size, type2, 0.0, 255.0, false);
 
         alpha = rng.uniform(-10.0, 10.0);
         beta = rng.uniform(-10.0, 10.0);
@@ -1760,21 +1455,12 @@ TEST_P(AddWeighted, Accuracy)
     if ((src1.depth() == CV_64F || src2.depth() == CV_64F || dst_gold.depth() == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type1);
-    PRINT_TYPE(type2);
-    PRINT_TYPE(dtype);
-    PRINT_PARAM(size);
-    PRINT_PARAM(alpha);
-    PRINT_PARAM(beta);
-    PRINT_PARAM(gamma);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
 
-        cv::gpu::addWeighted(cv::gpu::GpuMat(src1), alpha, cv::gpu::GpuMat(src2), beta, gamma, dev_dst, dtype);
+        cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dev_dst, dtype);
 
         dev_dst.download(dst);
     );
@@ -1782,21 +1468,23 @@ TEST_P(AddWeighted, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, dtype < CV_32F ? 1.0 : 1e-12);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)),
-                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)),
-                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1))));
+INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(
+                        ALL_DEVICES,
+                        TYPES(CV_8U, CV_64F, 1, 1),
+                        TYPES(CV_8U, CV_64F, 1, 1),
+                        TYPES(CV_8U, CV_64F, 1, 1),
+                        USE_ROI));
 
 //////////////////////////////////////////////////////////////////////////////
 // reduce
 
-struct Reduce : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int, int> >
+PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, MatType, int, ReduceOp, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
     int dim;
-    int reduceOp;
+    int reduceOp;    
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
@@ -1805,18 +1493,19 @@ struct Reduce : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        dim = std::tr1::get<2>(GetParam());
-        reduceOp = std::tr1::get<3>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        dim = GET_PARAM(2);
+        reduceOp = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 400), rng.uniform(100, 400));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 255.0, false);
+        src = randomMat(rng, size, type, 0.0, 255.0, false);
 
         cv::reduce(src, dst_gold, dim, reduceOp, reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG ? CV_32F : CV_MAT_DEPTH(type));
 
@@ -1831,21 +1520,12 @@ struct Reduce : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
 
 TEST_P(Reduce, Accuracy) 
 {
-    static const char* reduceOpStrs[] = {"CV_REDUCE_SUM", "CV_REDUCE_AVG", "CV_REDUCE_MAX", "CV_REDUCE_MIN"};
-    const char* reduceOpStr = reduceOpStrs[reduceOp];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(dim);
-    PRINT_PARAM(reduceOpStr);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
 
-        cv::gpu::reduce(cv::gpu::GpuMat(src), dev_dst, dim, reduceOp, reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG ? CV_32F : CV_MAT_DEPTH(type));
+        cv::gpu::reduce(loadMat(src, useRoi), dev_dst, dim, reduceOp, reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG ? CV_32F : CV_MAT_DEPTH(type));
 
         dev_dst.download(dst);
     );
@@ -1854,20 +1534,22 @@ TEST_P(Reduce, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, norm);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, Reduce, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values(0, 1),
-                        testing::Values((int)CV_REDUCE_SUM, (int)CV_REDUCE_AVG, (int)CV_REDUCE_MAX, (int)CV_REDUCE_MIN)));
+INSTANTIATE_TEST_CASE_P(Arithm, Reduce, Combine(
+                        ALL_DEVICES,
+                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        Values(0, 1),
+                        Values((int)CV_REDUCE_SUM, (int)CV_REDUCE_AVG, (int)CV_REDUCE_MAX, (int)CV_REDUCE_MIN),
+                        USE_ROI));
 
 //////////////////////////////////////////////////////////////////////////////
 // gemm
 
-struct GEMM : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(GEMM, cv::gpu::DeviceInfo, MatType, GemmFlags, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
     int flags;
+    bool useRoi;
 
     int size;
     cv::Mat src1;
@@ -1880,19 +1562,20 @@ struct GEMM : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        flags = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        flags = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
-        size = rng.uniform(100, 500);
+        size = rng.uniform(100, 200);
 
-        src1 = cvtest::randomMat(rng, cv::Size(size, size), type, -10.0, 10.0, false);
-        src2 = cvtest::randomMat(rng, cv::Size(size, size), type, -10.0, 10.0, false);
-        src3 = cvtest::randomMat(rng, cv::Size(size, size), type, -10.0, 10.0, false);
+        src1 = randomMat(rng, cv::Size(size, size), type, -10.0, 10.0, false);
+        src2 = randomMat(rng, cv::Size(size, size), type, -10.0, 10.0, false);
+        src3 = randomMat(rng, cv::Size(size, size), type, -10.0, 10.0, false);
         alpha = rng.uniform(-10.0, 10.0);
         beta = rng.uniform(-10.0, 10.0);
 
@@ -1902,16 +1585,12 @@ struct GEMM : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
 TEST_P(GEMM, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(flags);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
 
-        cv::gpu::gemm(cv::gpu::GpuMat(src1), cv::gpu::GpuMat(src2), alpha, cv::gpu::GpuMat(src3), beta, dev_dst, flags);
+        cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dev_dst, flags);
 
         dev_dst.download(dst);
     );
@@ -1919,9 +1598,10 @@ TEST_P(GEMM, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-1);
 }
 
-INSTANTIATE_TEST_CASE_P(Arithm, GEMM, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_32FC1, CV_32FC2),
-                        testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)cv::GEMM_3_T)));
+INSTANTIATE_TEST_CASE_P(Arithm, GEMM, Combine(
+                        ALL_DEVICES,
+                        Values(CV_32FC1, CV_32FC2),
+                        Values(0, (int) cv::GEMM_1_T, (int) cv::GEMM_2_T, (int) cv::GEMM_3_T),
+                        USE_ROI));
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_calib3d.cpp b/modules/gpu/test/test_calib3d.cpp
index d6fe168c3..919ec900a 100644
--- a/modules/gpu/test/test_calib3d.cpp
+++ b/modules/gpu/test/test_calib3d.cpp
@@ -43,10 +43,13 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
 //////////////////////////////////////////////////////////////////////////
 // BlockMatching
 
-struct StereoBlockMatching : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct StereoBlockMatching : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::Mat img_l;
     cv::Mat img_r;
@@ -71,9 +74,7 @@ struct StereoBlockMatching : testing::TestWithParam<cv::gpu::DeviceInfo>
 };
 
 TEST_P(StereoBlockMatching, Regression) 
-{
-    PRINT_PARAM(devInfo);
-    
+{    
     cv::Mat disp;
 
     ASSERT_NO_THROW(
@@ -90,12 +91,12 @@ TEST_P(StereoBlockMatching, Regression)
     EXPECT_MAT_NEAR(img_template, disp, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoBlockMatching, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBlockMatching, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////////
 // BeliefPropagation
 
-struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct StereoBeliefPropagation : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::Mat img_l;
     cv::Mat img_r;
@@ -121,8 +122,6 @@ struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(StereoBeliefPropagation, Regression) 
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat disp;
 
     ASSERT_NO_THROW(
@@ -139,12 +138,12 @@ TEST_P(StereoBeliefPropagation, Regression)
     EXPECT_MAT_NEAR(img_template, disp, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////////
 // ConstantSpaceBP
 
-struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct StereoConstantSpaceBP : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::Mat img_l;
     cv::Mat img_r;
@@ -174,8 +173,6 @@ struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(StereoConstantSpaceBP, Regression) 
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat disp;
 
     ASSERT_NO_THROW(
@@ -192,12 +189,12 @@ TEST_P(StereoConstantSpaceBP, Regression)
     EXPECT_MAT_NEAR(img_template, disp, 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // projectPoints
 
-struct ProjectPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct ProjectPoints : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
     
@@ -231,8 +228,6 @@ struct ProjectPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(ProjectPoints, Accuracy) 
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(   
@@ -257,12 +252,12 @@ TEST_P(ProjectPoints, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, ALL_DEVICES);
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // transformPoints
 
-struct TransformPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct TransformPoints : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -289,8 +284,6 @@ struct TransformPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(TransformPoints, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
@@ -318,12 +311,12 @@ TEST_P(TransformPoints, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, ALL_DEVICES);
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // solvePnPRansac
 
-struct SolvePnPRansac : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct SolvePnPRansac : TestWithParam<cv::gpu::DeviceInfo>
 {
     static const int num_points = 5000;
 
@@ -360,8 +353,6 @@ struct SolvePnPRansac : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(SolvePnPRansac, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat rvec, tvec;
     std::vector<int> inliers;
 
@@ -374,6 +365,6 @@ TEST_P(SolvePnPRansac, Accuracy)
     ASSERT_LE(cv::norm(tvec - tvec_gold), 1e-3f);
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, ALL_DEVICES);
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp
index 14c83a0e7..deeb72c03 100644
--- a/modules/gpu/test/test_features2d.cpp
+++ b/modules/gpu/test/test_features2d.cpp
@@ -43,17 +43,53 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
+int getValidMatchesCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
+{
+    int validCount = 0;
+
+    for (size_t i = 0; i < matches.size(); ++i)
+    {
+        const cv::DMatch& m = matches[i];
+
+        const cv::KeyPoint& p1 = keypoints1[m.queryIdx];
+        const cv::KeyPoint& p2 = keypoints2[m.trainIdx];
+
+        const float maxPtDif = 1.f;
+        const float maxSizeDif = 1.f;
+        const float maxAngleDif = 2.f;
+        const float maxResponseDif = 0.1f;
+
+        float dist = (float) cv::norm(p1.pt - p2.pt);
+
+        if (dist < maxPtDif &&
+            fabs(p1.size - p2.size) < maxSizeDif &&
+            abs(p1.angle - p2.angle) < maxAngleDif &&
+            abs(p1.response - p2.response) < maxResponseDif &&
+            p1.octave == p2.octave &&
+            p1.class_id == p2.class_id)
+        {
+            ++validCount;
+        }
+    }
+
+    return validCount;
+}
+
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // SURF
 
-struct SURF : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct SURF : TestWithParam<cv::gpu::DeviceInfo>
 {
+    cv::gpu::DeviceInfo devInfo;
+
     cv::Mat image;
     cv::Mat mask;
+
     std::vector<cv::KeyPoint> keypoints_gold;
     std::vector<float> descriptors_gold;
-
-    cv::gpu::DeviceInfo devInfo;
     
     virtual void SetUp()
     {
@@ -67,15 +103,14 @@ struct SURF : testing::TestWithParam<cv::gpu::DeviceInfo>
         mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar::all(1));
         mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
                 
-        cv::SURF fdetector_gold; fdetector_gold.extended = false;
+        cv::SURF fdetector_gold; 
+        fdetector_gold.extended = false;
         fdetector_gold(image, mask, keypoints_gold, descriptors_gold);        
     }
 };
 
 TEST_P(SURF, EmptyDataTest)
 {
-    PRINT_PARAM(devInfo);
-
     cv::gpu::SURF_GPU fdetector;
 
     cv::gpu::GpuMat image;
@@ -92,9 +127,6 @@ TEST_P(SURF, EmptyDataTest)
 
 TEST_P(SURF, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-
-    // Compute keypoints.
     std::vector<cv::KeyPoint> keypoints;
     cv::Mat descriptors;
 
@@ -102,7 +134,7 @@ TEST_P(SURF, Accuracy)
         cv::gpu::GpuMat dev_descriptors;
         cv::gpu::SURF_GPU fdetector; fdetector.extended = false;
 
-        fdetector(cv::gpu::GpuMat(image), cv::gpu::GpuMat(mask), keypoints, dev_descriptors);
+        fdetector(loadMat(image), loadMat(mask), keypoints, dev_descriptors);
 
         dev_descriptors.download(descriptors);
     );
@@ -112,45 +144,19 @@ TEST_P(SURF, Accuracy)
 
     matcher.match(cv::Mat(static_cast<int>(keypoints_gold.size()), 64, CV_32FC1, &descriptors_gold[0]), descriptors, matches);
 
-    int validCount = 0;
-    
-    for (size_t i = 0; i < matches.size(); ++i)
-    {
-        const cv::DMatch& m = matches[i];
+    int validCount = getValidMatchesCount(keypoints_gold, keypoints, matches);
 
-        const cv::KeyPoint& p1 = keypoints_gold[m.queryIdx];
-        const cv::KeyPoint& p2 = keypoints[m.trainIdx];
-
-        const float maxPtDif = 1.f;
-        const float maxSizeDif = 1.f;
-        const float maxAngleDif = 2.f;
-        const float maxResponseDif = 0.1f;
-
-        float dist = (float)cv::norm(p1.pt - p2.pt);
-        if (dist < maxPtDif &&
-            fabs(p1.size - p2.size) < maxSizeDif &&
-            abs(p1.angle - p2.angle) < maxAngleDif &&
-            abs(p1.response - p2.response) < maxResponseDif &&
-            p1.octave == p2.octave &&
-            p1.class_id == p2.class_id)
-        {
-            ++validCount;
-        }
-    }
-
-    double validRatio = (double)validCount / matches.size();
+    double validRatio = (double) validCount / matches.size();
 
     EXPECT_GT(validRatio, 0.5);
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, SURF, testing::ValuesIn(devices(cv::gpu::GLOBAL_ATOMICS)));
+INSTANTIATE_TEST_CASE_P(Features2D, SURF, DEVICES(cv::gpu::GLOBAL_ATOMICS));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // BruteForceMatcher
 
-static const char* dists[] = {"L1Dist", "L2Dist", "HammingDist"};
-
-struct BruteForceMatcher : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, cv::gpu::BruteForceMatcher_GPU_base::DistType, int> >
+PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, DistType, int)
 {
     static const int queryDescCount = 300; // must be even number because we split train data in some cases in two
     static const int countFactor = 4; // do not change it
@@ -163,9 +169,9 @@ struct BruteForceMatcher : testing::TestWithParam< std::tr1::tuple<cv::gpu::Devi
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        distType = std::tr1::get<1>(GetParam());
-        dim = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        distType = (cv::gpu::BruteForceMatcher_GPU_base::DistType)(int)GET_PARAM(1);
+        dim = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
@@ -205,23 +211,14 @@ struct BruteForceMatcher : testing::TestWithParam< std::tr1::tuple<cv::gpu::Devi
     }
 };
 
-const int BruteForceMatcher::queryDescCount;
-const int BruteForceMatcher::countFactor;
-
 TEST_P(BruteForceMatcher, Match)
 {
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     std::vector<cv::DMatch> matches;
 
     ASSERT_NO_THROW(
         cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
 
-        matcher.match(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches);
+        matcher.match(loadMat(query), loadMat(train), matches);
     );
 
     ASSERT_EQ(queryDescCount, matches.size());
@@ -239,12 +236,6 @@ TEST_P(BruteForceMatcher, Match)
 
 TEST_P(BruteForceMatcher, MatchAdd)
 {
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     std::vector<cv::DMatch> matches;
 
     bool isMaskSupported;
@@ -298,19 +289,13 @@ TEST_P(BruteForceMatcher, MatchAdd)
 
 TEST_P(BruteForceMatcher, KnnMatch2)
 {
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     const int knn = 2;
 
     std::vector< std::vector<cv::DMatch> > matches;
 
     ASSERT_NO_THROW(
         cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
-        matcher.knnMatch(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches, knn);
+        matcher.knnMatch(loadMat(query), loadMat(train), matches, knn);
     );
 
     ASSERT_EQ(queryDescCount, matches.size());
@@ -338,19 +323,13 @@ TEST_P(BruteForceMatcher, KnnMatch2)
 
 TEST_P(BruteForceMatcher, KnnMatch3)
 {
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     const int knn = 3;
 
     std::vector< std::vector<cv::DMatch> > matches;
 
     ASSERT_NO_THROW(
         cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
-        matcher.knnMatch(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches, knn);
+        matcher.knnMatch(loadMat(query), loadMat(train), matches, knn);
     );
 
     ASSERT_EQ(queryDescCount, matches.size());
@@ -378,12 +357,6 @@ TEST_P(BruteForceMatcher, KnnMatch3)
 
 TEST_P(BruteForceMatcher, KnnMatchAdd2)
 {
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     const int knn = 2;
     std::vector< std::vector<cv::DMatch> > matches;
 
@@ -448,12 +421,6 @@ TEST_P(BruteForceMatcher, KnnMatchAdd2)
 
 TEST_P(BruteForceMatcher, KnnMatchAdd3)
 {
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     const int knn = 3;
     std::vector< std::vector<cv::DMatch> > matches;
 
@@ -521,12 +488,6 @@ TEST_P(BruteForceMatcher, RadiusMatch)
     if (!supportFeature(devInfo, cv::gpu::SHARED_ATOMICS))
         return;
 
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     const float radius = 1.f / countFactor;
 
     std::vector< std::vector<cv::DMatch> > matches;
@@ -534,7 +495,7 @@ TEST_P(BruteForceMatcher, RadiusMatch)
     ASSERT_NO_THROW(
         cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
 
-        matcher.radiusMatch(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches, radius);
+        matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
     );
 
     ASSERT_EQ(queryDescCount, matches.size());
@@ -560,12 +521,6 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
     if (!supportFeature(devInfo, cv::gpu::SHARED_ATOMICS))
         return;
 
-    const char* distStr = dists[distType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(distStr);
-    PRINT_PARAM(dim);
-
     int n = 3;
     const float radius = 1.f / countFactor * n;
 
@@ -631,15 +586,15 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
     ASSERT_EQ(0, badCount);
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist),
-                        testing::Values(57, 64, 83, 128, 179, 256, 304)));
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher, Combine(
+                        ALL_DEVICES,
+                        Values(cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist),
+                        Values(57, 64, 83, 128, 179, 256, 304)));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // FAST
 
-struct FAST : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct FAST : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -659,7 +614,7 @@ struct FAST : testing::TestWithParam<cv::gpu::DeviceInfo>
         ASSERT_FALSE(image.empty());
 
         cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-        threshold = rng.uniform(15, 80);
+        threshold = 30;
 
         cv::FAST(image, keypoints_gold, threshold);
     }
@@ -709,12 +664,12 @@ TEST_P(FAST, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, FAST, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Features2D, FAST, DEVICES(cv::gpu::GLOBAL_ATOMICS));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // ORB
 
-struct ORB : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct ORB : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -738,7 +693,7 @@ struct ORB : testing::TestWithParam<cv::gpu::DeviceInfo>
         mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar::all(1));
         mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
 
-        npoints = 4000;
+        npoints = 1000;
 
         cv::ORB orbCPU(npoints);
 
@@ -746,34 +701,6 @@ struct ORB : testing::TestWithParam<cv::gpu::DeviceInfo>
     }
 };
 
-int getValidMatchesCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
-{
-    int count = 0;
-
-    for (size_t i = 0; i < matches.size(); ++i)
-    {
-        const cv::DMatch& m = matches[i];
-
-        const cv::KeyPoint& kp1 = keypoints1[m.queryIdx];
-        const cv::KeyPoint& kp2 = keypoints2[m.trainIdx];
-
-        bool isEq = 
-            fabs(kp1.pt.x - kp2.pt.x) <= 1 && 
-            fabs(kp1.pt.y - kp2.pt.y) <= 1 && 
-            //fabs(kp1.size - kp2.size) < 1 && 
-            //fabs(kp1.angle - kp2.angle) <= 1 && 
-            //fabs(kp1.response - kp2.response) < 1 &&
-            //kp1.octave == kp2.octave && 
-            //kp1.class_id == kp2.class_id
-            true;
-
-        if (isEq)
-            ++count;
-    }
-
-    return count;
-}
-
 TEST_P(ORB, Accuracy)
 {
     std::vector<cv::KeyPoint> keypoints;
@@ -794,11 +721,11 @@ TEST_P(ORB, Accuracy)
     matcher.match(descriptors_gold, descriptors, matches);
 
     int count = getValidMatchesCount(keypoints_gold, keypoints, matches);
-    double ratio = 100.0 * count / matches.size();
+    double ratio = (double) count / matches.size();
 
-    ASSERT_GE(ratio, 70.0);
+    ASSERT_GE(ratio, 0.65);
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, ORB, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Features2D, ORB, DEVICES(cv::gpu::GLOBAL_ATOMICS));
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp
index e9b2ef8d2..382fd1678 100644
--- a/modules/gpu/test/test_filters.cpp
+++ b/modules/gpu/test/test_filters.cpp
@@ -43,6 +43,9 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
 namespace
 {
     double checkNorm(const cv::Mat& m1, const cv::Mat& m2, const cv::Size& ksize)
@@ -69,10 +72,11 @@ namespace
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // blur
 
-struct Blur : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(Blur, cv::gpu::DeviceInfo, cv::Size, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     cv::Size ksize;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -82,8 +86,9 @@ struct Blur : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        ksize = cv::Size(std::tr1::get<1>(GetParam()), std::tr1::get<2>(GetParam()));
+        devInfo = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
                 
@@ -98,42 +103,51 @@ struct Blur : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
     }
 };
 
-TEST_P(Blur, Accuracy)
+TEST_P(Blur, Rgba)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(ksize);
-
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::blur(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, ksize);
-        cv::gpu::blur(cv::gpu::GpuMat(img_gray), dev_dst_gray, ksize);
+        cv::gpu::blur(loadMat(img_rgba, useRoi), dev_dst_rgba, ksize);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 1.0);
+}
+
+TEST_P(Blur, Gray)
+{
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::blur(loadMat(img_gray, useRoi), dev_dst_gray, ksize);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Blur, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(3, 5, 7), 
-                        testing::Values(3, 5, 7)));
+INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
+                        ALL_DEVICES, 
+                        Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
+                        USE_ROI));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // sobel
 
-struct Sobel : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, std::pair<int, int> > >
+PARAM_TEST_CASE(Sobel, cv::gpu::DeviceInfo, int, int, int, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int ksize;
-    int dx, dy;
+    int dx;
+    int dy;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -143,10 +157,14 @@ struct Sobel : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        ksize = std::tr1::get<1>(GetParam());
-        std::pair<int, int> d = std::tr1::get<2>(GetParam());
-        dx = d.first; dy = d.second;
+        devInfo = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        dx = GET_PARAM(2);
+        dy = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        if (dx == 0 && dy == 0)
+            return;
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -161,43 +179,58 @@ struct Sobel : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
     }
 };
 
-TEST_P(Sobel, Accuracy)
+TEST_P(Sobel, Rgba)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(ksize);
-    PRINT_PARAM(dx);
-    PRINT_PARAM(dy);
+    if (dx == 0 && dy == 0)
+        return;
 
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::Sobel(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, -1, dx, dy, ksize);
-        cv::gpu::Sobel(cv::gpu::GpuMat(img_gray), dev_dst_gray, -1, dx, dy, ksize);
+        cv::gpu::Sobel(loadMat(img_rgba, useRoi), dev_dst_rgba, -1, dx, dy, ksize);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 0.0);
+}
+
+TEST_P(Sobel, Gray)
+{
+    if (dx == 0 && dy == 0)
+        return;
+
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::Sobel(loadMat(img_gray, useRoi), dev_dst_gray, -1, dx, dy, ksize);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Sobel, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(3, 5, 7), 
-                        testing::Values(std::make_pair(1, 0), std::make_pair(0, 1), std::make_pair(1, 1), std::make_pair(2, 0), std::make_pair(2, 1), std::make_pair(0, 2), std::make_pair(1, 2), std::make_pair(2, 2))));
+INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
+                        ALL_DEVICES, 
+                        Values(3, 5, 7), 
+                        Values(0, 1, 2),
+                        Values(0, 1, 2),
+                        USE_ROI));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // scharr
 
-struct Scharr : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std::pair<int, int> > >
+PARAM_TEST_CASE(Scharr, cv::gpu::DeviceInfo, int, int, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
-    int dx, dy;
+    int dx;
+    int dy;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -207,9 +240,13 @@ struct Scharr : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        std::pair<int, int> d = std::tr1::get<1>(GetParam());
-        dx = d.first; dy = d.second;
+        devInfo = GET_PARAM(0);
+        dx = GET_PARAM(1);
+        dy = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        if (dx + dy != 1)
+            return;
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -224,41 +261,56 @@ struct Scharr : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std
     }
 };
 
-TEST_P(Scharr, Accuracy)
+TEST_P(Scharr, Rgba)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(dx);
-    PRINT_PARAM(dy);
+    if (dx + dy != 1)
+        return;
 
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::Scharr(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, -1, dx, dy);
-        cv::gpu::Scharr(cv::gpu::GpuMat(img_gray), dev_dst_gray, -1, dx, dy);
+        cv::gpu::Scharr(loadMat(img_rgba, useRoi), dev_dst_rgba, -1, dx, dy);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+}
+
+TEST_P(Scharr, Gray)
+{
+    if (dx + dy != 1)
+        return;
+
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::Scharr(loadMat(img_gray, useRoi), dev_dst_gray, -1, dx, dy);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Scharr, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(std::make_pair(1, 0), std::make_pair(0, 1))));
+INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
+                        ALL_DEVICES, 
+                        Values(0, 1),
+                        Values(0, 1),
+                        USE_ROI));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // gaussianBlur
 
-struct GaussianBlur : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     cv::Size ksize;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -270,8 +322,9 @@ struct GaussianBlur : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInf
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        ksize = cv::Size(std::tr1::get<1>(GetParam()), std::tr1::get<2>(GetParam()));
+        devInfo = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -291,43 +344,49 @@ struct GaussianBlur : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInf
     }
 };
 
-TEST_P(GaussianBlur, Accuracy)
+TEST_P(GaussianBlur, Rgba)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(ksize);
-    PRINT_PARAM(sigma1);
-    PRINT_PARAM(sigma2);
-
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::GaussianBlur(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, ksize, sigma1, sigma2);
-        cv::gpu::GaussianBlur(cv::gpu::GpuMat(img_gray), dev_dst_gray, ksize, sigma1, sigma2);
+        cv::gpu::GaussianBlur(loadMat(img_rgba, useRoi), dev_dst_rgba, ksize, sigma1, sigma2);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 3.0);
+}
+
+TEST_P(GaussianBlur, Gray)
+{
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::GaussianBlur(loadMat(img_gray, useRoi), dev_dst_gray, ksize, sigma1, sigma2);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 3.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(3, 5, 7), 
-                        testing::Values(3, 5, 7)));
+INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
+                        ALL_DEVICES, 
+                        Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
+                        USE_ROI));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // laplacian
 
-struct Laplacian : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, int, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int ksize;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -337,8 +396,9 @@ struct Laplacian : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        ksize = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -353,39 +413,48 @@ struct Laplacian : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
     }
 };
 
-TEST_P(Laplacian, Accuracy)
+TEST_P(Laplacian, Rgba)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(ksize);
-
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::Laplacian(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, -1, ksize);
-        cv::gpu::Laplacian(cv::gpu::GpuMat(img_gray), dev_dst_gray, -1, ksize);
+        cv::gpu::Laplacian(loadMat(img_rgba, useRoi), dev_dst_rgba, -1, ksize);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+}
+
+TEST_P(Laplacian, Gray)
+{
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::Laplacian(loadMat(img_gray, useRoi), dev_dst_gray, -1, ksize);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Laplacian, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(1, 3)));
+INSTANTIATE_TEST_CASE_P(Filter, Laplacian, Combine(
+                        ALL_DEVICES,
+                        Values(1, 3),
+                        USE_ROI));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // erode
 
-struct Erode : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(Erode, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -397,7 +466,8 @@ struct Erode : testing::TestWithParam<cv::gpu::DeviceInfo>
     
     virtual void SetUp()
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
@@ -414,36 +484,47 @@ struct Erode : testing::TestWithParam<cv::gpu::DeviceInfo>
     }
 };
 
-TEST_P(Erode, Accuracy)
+TEST_P(Erode, Rgba)
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::erode(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, kernel);
-        cv::gpu::erode(cv::gpu::GpuMat(img_gray), dev_dst_gray, kernel);
+        cv::gpu::erode(loadMat(img_rgba, useRoi), dev_dst_rgba, kernel);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+}
+
+TEST_P(Erode, Gray)
+{
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::erode(loadMat(img_gray, useRoi), dev_dst_gray, kernel);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Erode, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // dilate
 
-struct Dilate : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(Dilate, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -455,7 +536,8 @@ struct Dilate : testing::TestWithParam<cv::gpu::DeviceInfo>
     
     virtual void SetUp()
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
@@ -472,40 +554,48 @@ struct Dilate : testing::TestWithParam<cv::gpu::DeviceInfo>
     }
 };
 
-TEST_P(Dilate, Accuracy)
+TEST_P(Dilate, Rgba)
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::dilate(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, kernel);
-        cv::gpu::dilate(cv::gpu::GpuMat(img_gray), dev_dst_gray, kernel);
+        cv::gpu::dilate(loadMat(img_rgba, useRoi), dev_dst_rgba, kernel);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+}
+
+TEST_P(Dilate, Gray)
+{
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::dilate(loadMat(img_gray, useRoi), dev_dst_gray, kernel);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Dilate, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
+                        ALL_DEVICES,
+                        USE_ROI));
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // morphEx
 
-static const int morphOps[] = {cv::MORPH_OPEN, CV_MOP_CLOSE, CV_MOP_GRADIENT, CV_MOP_TOPHAT, CV_MOP_BLACKHAT};
-static const char* morphOps_str[] = {"MORPH_OPEN", "MOP_CLOSE", "MOP_GRADIENT", "MOP_TOPHAT", "MOP_BLACKHAT"};
-
-struct MorphEx : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(MorphEx, cv::gpu::DeviceInfo, MorphOp, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
-    int morphOpsIdx;
+    int morphOp;
+    bool useRoi;
     
     cv::Mat img_rgba;
     cv::Mat img_gray;
@@ -517,8 +607,9 @@ struct MorphEx : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, in
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        morphOpsIdx = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        morphOp = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -530,38 +621,44 @@ struct MorphEx : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, in
 
         kernel = cv::Mat::ones(3, 3, CV_8U);
         
-        cv::morphologyEx(img_rgba, dst_gold_rgba, morphOps[morphOpsIdx], kernel);
-        cv::morphologyEx(img_gray, dst_gold_gray, morphOps[morphOpsIdx], kernel);
+        cv::morphologyEx(img_rgba, dst_gold_rgba, morphOp, kernel);
+        cv::morphologyEx(img_gray, dst_gold_gray, morphOp, kernel);
     }
 };
 
-TEST_P(MorphEx, Accuracy)
+TEST_P(MorphEx, Rgba)
 {
-    const char* morphOpStr = morphOps_str[morphOpsIdx];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(morphOpStr);
-
     cv::Mat dst_rgba;
-    cv::Mat dst_gray;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst_rgba;
-        cv::gpu::GpuMat dev_dst_gray;
 
-        cv::gpu::morphologyEx(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, morphOps[morphOpsIdx], kernel);
-        cv::gpu::morphologyEx(cv::gpu::GpuMat(img_gray), dev_dst_gray, morphOps[morphOpsIdx], kernel);
+        cv::gpu::morphologyEx(loadMat(img_rgba, useRoi), dev_dst_rgba, morphOp, kernel);
 
         dev_dst_rgba.download(dst_rgba);
-        dev_dst_gray.download(dst_gray);
     );
 
     EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 4, 0.0);
+}
+
+TEST_P(MorphEx, Gray)
+{
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::morphologyEx(loadMat(img_gray, useRoi), dev_dst_gray, morphOp, kernel);
+
+        dev_dst_gray.download(dst_gray);
+    );
+
     EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 4, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, MorphEx, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Range(0, 5)));
+INSTANTIATE_TEST_CASE_P(Filter, MorphEx, Combine(
+                        ALL_DEVICES,
+                        Values((int)cv::MORPH_OPEN, (int)cv::MORPH_CLOSE, (int)cv::MORPH_GRADIENT, (int)cv::MORPH_TOPHAT, (int)cv::MORPH_BLACKHAT),
+                        USE_ROI));
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_gpu_base.cpp b/modules/gpu/test/test_gpu_base.cpp
index f035d7a3c..2c34a472e 100644
--- a/modules/gpu/test/test_gpu_base.cpp
+++ b/modules/gpu/test/test_gpu_base.cpp
@@ -41,25 +41,53 @@
 
 #include "test_precomp.hpp"
 
-bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature)
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+
+GpuMat loadMat(const Mat& m, bool useRoi)
 {
-    return cv::gpu::TargetArchs::builtWith(feature) && info.supports(feature);
+    Size size = m.size();
+    Size size0 = size;
+
+    if (useRoi)
+    {
+        RNG& rng = TS::ptr()->get_rng();
+
+        size0.width += rng.uniform(5, 15);
+        size0.height += rng.uniform(5, 15);
+    }
+        
+    GpuMat d_m(size0, m.type());
+    
+    if (size0 != size)
+        d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
+
+    d_m.upload(m);
+
+    return d_m;
 }
 
-const std::vector<cv::gpu::DeviceInfo>& devices()
+bool supportFeature(const DeviceInfo& info, FeatureSet feature)
 {
-    static std::vector<cv::gpu::DeviceInfo> devs;
+    return TargetArchs::builtWith(feature) && info.supports(feature);
+}
+
+const vector<DeviceInfo>& devices()
+{
+    static vector<DeviceInfo> devs;
     static bool first = true;
 
     if (first)
     {
-        int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
+        int deviceCount = getCudaEnabledDeviceCount();
 
         devs.reserve(deviceCount);
 
         for (int i = 0; i < deviceCount; ++i)
         {
-            cv::gpu::DeviceInfo info(i);
+            DeviceInfo info(i);
             if (info.isCompatible())
                 devs.push_back(info);
         }
@@ -70,19 +98,19 @@ const std::vector<cv::gpu::DeviceInfo>& devices()
     return devs;
 }
 
-std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature)
+vector<DeviceInfo> devices(FeatureSet feature)
 {
-    const std::vector<cv::gpu::DeviceInfo>& d = devices();
+    const vector<DeviceInfo>& d = devices();
     
-    std::vector<cv::gpu::DeviceInfo> devs_filtered;
+    vector<DeviceInfo> devs_filtered;
 
-    if (cv::gpu::TargetArchs::builtWith(feature))
+    if (TargetArchs::builtWith(feature))
     {
         devs_filtered.reserve(d.size());
 
         for (size_t i = 0, size = d.size(); i < size; ++i)
         {
-            const cv::gpu::DeviceInfo& info = d[i];
+            const DeviceInfo& info = d[i];
 
             if (info.supports(feature))
                 devs_filtered.push_back(info);
@@ -92,9 +120,9 @@ std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature)
     return devs_filtered;
 }
 
-std::vector<int> types(int depth_start, int depth_end, int cn_start, int cn_end)
+vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
 {
-    std::vector<int> v;
+    vector<MatType> v;
 
     v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
 
@@ -109,46 +137,39 @@ std::vector<int> types(int depth_start, int depth_end, int cn_start, int cn_end)
     return v;
 }
 
-const std::vector<int>& all_types()
+const vector<MatType>& all_types()
 {
-    static std::vector<int> v = types(CV_8U, CV_64F, 1, 4);
+    static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
+
     return v;
 }
 
-cv::Mat readImage(const std::string& fileName, int flags)
+Mat readImage(const string& fileName, int flags)
 {
-    return cv::imread(std::string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
+    return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
 }
 
-double checkNorm(const cv::Mat& m1, const cv::Mat& m2)
+double checkNorm(const Mat& m1, const Mat& m2)
 {
-    return cv::norm(m1, m2, cv::NORM_INF);
+    return norm(m1, m2, NORM_INF);
 }
 
-double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2)
+double checkSimilarity(const Mat& m1, const Mat& m2)
 {
-    cv::Mat diff;
-    cv::matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+    Mat diff;
+    matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
     return std::abs(diff.at<float>(0, 0) - 1.f);
 }
 
-namespace cv
+void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
 {
-    std::ostream& operator << (std::ostream& os, const Size& sz)
-    {
-        return os << sz.width << "x" << sz.height;
-    }
-
-    std::ostream& operator << (std::ostream& os, const Scalar& s)
-    {
-        return os << "[" << s[0] << ", " << s[1] << ", " << s[2] << ", " << s[3] << "]";
-    }
-
-    namespace gpu
-    {
-        std::ostream& operator << (std::ostream& os, const DeviceInfo& info)
-        {
-            return os << info.name();
-        }
-    }
+    (*os) << info.name();
+}
+
+void PrintTo(const UseRoi& useRoi, std::ostream* os)
+{
+    if (useRoi)
+        (*os) << "sub matrix";
+    else
+        (*os) << "whole matrix";
 }
diff --git a/modules/gpu/test/test_gpu_base.hpp b/modules/gpu/test/test_gpu_base.hpp
index 25bf163a3..3004164e0 100644
--- a/modules/gpu/test/test_gpu_base.hpp
+++ b/modules/gpu/test/test_gpu_base.hpp
@@ -42,6 +42,8 @@
 #ifndef __OPENCV_TEST_GPU_BASE_HPP__
 #define __OPENCV_TEST_GPU_BASE_HPP__
 
+cv::gpu::GpuMat loadMat(const cv::Mat& m, bool useRoi = false);
+
 //! return true if device supports specified feature and gpu module was built with support the feature.
 bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
 
@@ -50,30 +52,12 @@ const std::vector<cv::gpu::DeviceInfo>& devices();
 //! return all devices compatible with current gpu module build which support specified feature.
 std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
 
-//! return vector with types from specified range.
-std::vector<int> types(int depth_start, int depth_end, int cn_start, int cn_end);
-
-//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
-const std::vector<int>& all_types();
-
 //! read image from testdata folder.
-cv::Mat readImage(const std::string& fileName, int flags = CV_LOAD_IMAGE_COLOR);
+cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
 
 double checkNorm(const cv::Mat& m1, const cv::Mat& m2);
 double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
 
-#define OSTR_NAME(suf) ostr_ ## suf
-
-#define PRINT_PARAM(name) \
-        std::ostringstream OSTR_NAME(name); \
-        OSTR_NAME(name) << # name << ": " << name; \
-        SCOPED_TRACE(OSTR_NAME(name).str());
-
-#define PRINT_TYPE(type) \
-        std::ostringstream OSTR_NAME(type); \
-        OSTR_NAME(type) << # type << ": " << cvtest::getTypeName(type) << "c" << CV_MAT_CN(type); \
-        SCOPED_TRACE(OSTR_NAME(type).str());
-
 #define EXPECT_MAT_NEAR(mat1, mat2, eps) \
     { \
         ASSERT_EQ(mat1.type(), mat2.type()); \
@@ -88,16 +72,66 @@ double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
         EXPECT_LE(checkSimilarity(mat1, mat2), eps); \
     }
 
-
-//! for gtest ASSERT
-namespace cv
+namespace cv { namespace gpu 
 {
-    std::ostream& operator << (std::ostream& os, const Size& sz);
-    std::ostream& operator << (std::ostream& os, const Scalar& s);
-    namespace gpu
-    {
-        std::ostream& operator << (std::ostream& os, const DeviceInfo& info);
-    }
-}
+    void PrintTo(const DeviceInfo& info, std::ostream* os);
+}}
+
+using perf::MatDepth;
+using perf::MatType;
+
+//! return vector with types from specified range.
+std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
+
+//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
+const std::vector<MatType>& all_types();
+
+class UseRoi
+{
+public:
+    inline UseRoi(bool val = false) : val_(val) {}
+
+    inline operator bool() const { return val_; }
+
+private:
+    bool val_;
+};
+
+void PrintTo(const UseRoi& useRoi, std::ostream* os);
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+
+CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
+
+enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
+CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
+
+CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+
+CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+
+CV_ENUM(DistType, cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist)
+
+CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+
+CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
+
+CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+
+CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+
+CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+#define ALL_DEVICES testing::ValuesIn(devices())
+#define DEVICES(feature) testing::ValuesIn(devices(feature))
+#define ALL_TYPES testing::ValuesIn(all_types())
+#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
+#define USE_ROI testing::Values(false, true)
 
 #endif // __OPENCV_TEST_GPU_BASE_HPP__
diff --git a/modules/gpu/test/test_hog.cpp b/modules/gpu/test/test_hog.cpp
index c8afa058f..54ad6bb59 100644
--- a/modules/gpu/test/test_hog.cpp
+++ b/modules/gpu/test/test_hog.cpp
@@ -43,6 +43,9 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
 //#define DUMP
 
 struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
@@ -169,7 +172,7 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
 #endif
 };
 
-struct HogDetect : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct Detect : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
     
@@ -181,17 +184,15 @@ struct HogDetect : testing::TestWithParam<cv::gpu::DeviceInfo>
     }
 };
 
-TEST_P(HogDetect, Accuracy)
+TEST_P(Detect, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-
     ASSERT_NO_THROW(
         CV_GpuHogDetectTestRunner runner;
         runner.run();
     );
 }
 
-INSTANTIATE_TEST_CASE_P(HOG, HogDetect, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(HOG, Detect, ALL_DEVICES);
 
 struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor
 {
@@ -301,7 +302,7 @@ struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor
     int block_hist_size;
 };
 
-struct HogGetDescriptors : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct GetDescriptors : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
     
@@ -313,16 +314,14 @@ struct HogGetDescriptors : testing::TestWithParam<cv::gpu::DeviceInfo>
     }
 };
 
-TEST_P(HogGetDescriptors, Accuracy)
+TEST_P(GetDescriptors, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-
     ASSERT_NO_THROW(
         CV_GpuHogGetDescriptorsTestRunner runner;
         runner.run();
     );
 }
 
-INSTANTIATE_TEST_CASE_P(HOG, HogGetDescriptors, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(HOG, GetDescriptors, ALL_DEVICES);
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp
index 85c569317..e78a69ac2 100644
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -43,14 +43,18 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // threshold
 
-struct Threshold : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, MatType, ThreshOp, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
     int threshOp;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
@@ -61,17 +65,18 @@ struct Threshold : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        threshOp = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        threshOp = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        src = randomMat(rng, size, type, 0.0, 127.0, false);
 
         maxVal = rng.uniform(20.0, 127.0);
         thresh = rng.uniform(0.0, maxVal);
@@ -82,22 +87,12 @@ struct Threshold : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
 
 TEST_P(Threshold, Accuracy)
 {
-    static const char* ops[] = {"THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV"};
-    const char* threshOpStr = ops[threshOp];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(threshOpStr);
-    PRINT_PARAM(maxVal);
-    PRINT_PARAM(thresh);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::threshold(cv::gpu::GpuMat(src), gpuRes, thresh, maxVal, threshOp);
+        cv::gpu::threshold(loadMat(src, useRoi), gpuRes, thresh, maxVal, threshOp);
 
         gpuRes.download(dst);
     );
@@ -105,88 +100,94 @@ TEST_P(Threshold, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8U, CV_32F), 
-                        testing::Values((int)cv::THRESH_BINARY, (int)cv::THRESH_BINARY_INV, (int)cv::THRESH_TRUNC, (int)cv::THRESH_TOZERO, (int)cv::THRESH_TOZERO_INV)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_32FC1), 
+                        Values((int)cv::THRESH_BINARY, (int)cv::THRESH_BINARY_INV, (int)cv::THRESH_TRUNC, (int)cv::THRESH_TOZERO, (int)cv::THRESH_TOZERO_INV),
+                        USE_ROI));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // resize
 
-struct Resize : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(Resize, cv::gpu::DeviceInfo, MatType, Interpolation, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
     int interpolation;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
 
-    cv::Mat dst_gold1;
-    cv::Mat dst_gold2;
+    cv::Mat dst_gold_up;
+    cv::Mat dst_gold_down;
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        interpolation = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        interpolation = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, CV_MAT_DEPTH(type) == CV_32F ? 1.0 : 255.0, false);
+        src = randomMat(rng, size, type, 0.0, CV_MAT_DEPTH(type) == CV_32F ? 1.0 : 255.0, false);
 
-        cv::resize(src, dst_gold1, cv::Size(), 2.0, 2.0, interpolation);
-        cv::resize(src, dst_gold2, cv::Size(), 0.5, 0.5, interpolation);
+        cv::resize(src, dst_gold_up, cv::Size(), 2.0, 2.0, interpolation);
+        cv::resize(src, dst_gold_down, cv::Size(), 0.5, 0.5, interpolation);
     }
 };
 
-TEST_P(Resize, Accuracy)
+TEST_P(Resize, Up)
 {
-    static const char* interpolations[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC"};
-    const char* interpolationStr = interpolations[interpolation];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(interpolationStr);
-
-    cv::Mat dst1;
-    cv::Mat dst2;
+    cv::Mat dst;
 
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_src(src);
-        cv::gpu::GpuMat gpuRes1;
-        cv::gpu::GpuMat gpuRes2;
+        cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::resize(dev_src, gpuRes1, cv::Size(), 2.0, 2.0, interpolation);
-        cv::gpu::resize(dev_src, gpuRes2, cv::Size(), 0.5, 0.5, interpolation);
+        cv::gpu::resize(loadMat(src, useRoi), gpuRes, cv::Size(), 2.0, 2.0, interpolation);
 
-        gpuRes1.download(dst1);
-        gpuRes2.download(dst2);
+        gpuRes.download(dst);
     );
 
-    EXPECT_MAT_SIMILAR(dst_gold1, dst1, 0.21);
-    EXPECT_MAT_SIMILAR(dst_gold2, dst2, 0.21);
+    EXPECT_MAT_SIMILAR(dst_gold_up, dst, 0.21);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), 
-                        testing::Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR, (int)cv::INTER_CUBIC)));
+TEST_P(Resize, Down)
+{
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::resize(loadMat(src, useRoi), gpuRes, cv::Size(), 0.5, 0.5, interpolation);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_SIMILAR(dst_gold_down, dst, 0.22);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Resize, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), 
+                        Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR, (int)cv::INTER_CUBIC),
+                        USE_ROI));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // remap
 
-struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int, int> >
+PARAM_TEST_CASE(Remap, cv::gpu::DeviceInfo, MatType, Interpolation, Border, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
     int interpolation;
     int borderType;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
@@ -197,21 +198,22 @@ struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        interpolation = std::tr1::get<2>(GetParam());
-        borderType = std::tr1::get<3>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        interpolation = GET_PARAM(2);
+        borderType = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 256.0, false);
+        src = randomMat(rng, size, type, 0.0, 256.0, false);
 
-        xmap = cvtest::randomMat(rng, size, CV_32FC1, -20.0, src.cols + 20, false);
-        ymap = cvtest::randomMat(rng, size, CV_32FC1, -20.0, src.rows + 20, false);
+        xmap = randomMat(rng, size, CV_32FC1, -20.0, src.cols + 20, false);
+        ymap = randomMat(rng, size, CV_32FC1, -20.0, src.rows + 20, false);
         
         cv::remap(src, dst_gold, xmap, ymap, interpolation, borderType);
     }
@@ -219,24 +221,12 @@ struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
 TEST_P(Remap, Accuracy)
 {
-    static const char* interpolations_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC"};
-    static const char* borderTypes_str[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
-
-    const char* interpolationStr = interpolations_str[interpolation];
-    const char* borderTypeStr = borderTypes_str[borderType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(interpolationStr);
-    PRINT_PARAM(borderTypeStr);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
         
-        cv::gpu::remap(cv::gpu::GpuMat(src), gpuRes, cv::gpu::GpuMat(xmap), cv::gpu::GpuMat(ymap), interpolation, borderType);
+        cv::gpu::remap(loadMat(src, useRoi), gpuRes, loadMat(xmap, useRoi), loadMat(ymap, useRoi), interpolation, borderType);
 
         gpuRes.download(dst);
     );
@@ -244,29 +234,22 @@ TEST_P(Remap, Accuracy)
     EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-1);
 }
 
-INSTANTIATE_TEST_CASE_P
-(
-    ImgProc, Remap, testing::Combine
-    (
-        testing::ValuesIn(devices()), 
-        testing::Values
-        (
-            CV_8UC1, CV_8UC3, CV_8UC4,
-            CV_32FC1, CV_32FC3, CV_32FC4
-        ),
-        testing::Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR, (int)cv::INTER_CUBIC),
-        testing::Values((int)cv::BORDER_REFLECT101, (int)cv::BORDER_REPLICATE, (int)cv::BORDER_CONSTANT, (int)cv::BORDER_REFLECT, (int)cv::BORDER_WRAP)
-    )
-);
+INSTANTIATE_TEST_CASE_P(ImgProc, Remap, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR, (int)cv::INTER_CUBIC),
+                        Values((int)cv::BORDER_REFLECT101, (int)cv::BORDER_REPLICATE, (int)cv::BORDER_CONSTANT, (int)cv::BORDER_REFLECT, (int)cv::BORDER_WRAP),
+                        USE_ROI));
                         
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // copyMakeBorder
 
-struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(CopyMakeBorder, cv::gpu::DeviceInfo, MatType, Border, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
     int borderType;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
@@ -280,17 +263,18 @@ struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceI
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        borderType = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        borderType = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        src = randomMat(rng, size, type, 0.0, 127.0, false);
         
         top = rng.uniform(1, 10);
         botton = rng.uniform(1, 10);
@@ -304,26 +288,12 @@ struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceI
 
 TEST_P(CopyMakeBorder, Accuracy)
 {
-    static const char* borderTypes_str[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
-
-    const char* borderTypeStr = borderTypes_str[borderType];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(top);
-    PRINT_PARAM(botton);
-    PRINT_PARAM(left);
-    PRINT_PARAM(right);
-    PRINT_PARAM(borderTypeStr);
-    PRINT_PARAM(val);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::copyMakeBorder(cv::gpu::GpuMat(src), gpuRes, top, botton, left, right, borderType, val);
+        cv::gpu::copyMakeBorder(loadMat(src, useRoi), gpuRes, top, botton, left, right, borderType, val);
 
         gpuRes.download(dst);
     );
@@ -331,22 +301,20 @@ TEST_P(CopyMakeBorder, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC3, CV_16SC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int)cv::BORDER_REFLECT101, (int)cv::BORDER_REPLICATE, (int)cv::BORDER_CONSTANT, (int)cv::BORDER_REFLECT, (int)cv::BORDER_WRAP)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC3, CV_16SC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        Values((int)cv::BORDER_REFLECT101, (int)cv::BORDER_REPLICATE, (int)cv::BORDER_CONSTANT, (int)cv::BORDER_REFLECT, (int)cv::BORDER_WRAP),
+                        USE_ROI));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // warpAffine & warpPerspective
 
-static const int warpFlags[] = {cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_NEAREST | cv::WARP_INVERSE_MAP, cv::INTER_LINEAR | cv::WARP_INVERSE_MAP, cv::INTER_CUBIC | cv::WARP_INVERSE_MAP};
-static const char* warpFlags_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_NEAREST | WARP_INVERSE_MAP", "INTER_LINEAR | WARP_INVERSE_MAP", "INTER_CUBIC | WARP_INVERSE_MAP"};
-
-struct WarpAffine : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(WarpAffine, cv::gpu::DeviceInfo, MatType, WarpFlags)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
-    int flagIdx;
+    int flags;
 
     cv::Size size;
     cv::Mat src;
@@ -356,17 +324,17 @@ struct WarpAffine : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        flagIdx = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        flags = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        src = randomMat(rng, size, type, 0.0, 127.0, false);
 
         static double reflect[2][3] = { {-1,  0, 0},
                                         { 0, -1, 0}};
@@ -374,25 +342,18 @@ struct WarpAffine : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
         reflect[1][2] = size.height;
         M = cv::Mat(2, 3, CV_64F, (void*)reflect); 
 
-        cv::warpAffine(src, dst_gold, M, src.size(), warpFlags[flagIdx]);       
+        cv::warpAffine(src, dst_gold, M, src.size(), flags);       
     }
 };
 
 TEST_P(WarpAffine, Accuracy)
 {
-    const char* warpFlagStr = warpFlags_str[flagIdx];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(warpFlagStr);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::warpAffine(cv::gpu::GpuMat(src), gpuRes, M, src.size(), warpFlags[flagIdx]);
+        cv::gpu::warpAffine(loadMat(src), gpuRes, M, src.size(), flags);
 
         gpuRes.download(dst);
     );
@@ -404,11 +365,18 @@ TEST_P(WarpAffine, Accuracy)
     EXPECT_MAT_NEAR(dst_gold_roi, dst_roi, 1e-3);
 }
 
-struct WarpPerspective : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC, 
+                               (int) (cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), (int) (cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), 
+                               (int) (cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+
+PARAM_TEST_CASE(WarpPerspective, cv::gpu::DeviceInfo, MatType, WarpFlags)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
-    int flagIdx;
+    int flags;
 
     cv::Size size;
     cv::Mat src;
@@ -418,17 +386,17 @@ struct WarpPerspective : testing::TestWithParam< std::tr1::tuple<cv::gpu::Device
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        flagIdx = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        flags = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        src = randomMat(rng, size, type, 0.0, 127.0, false);
 
         static double reflect[3][3] = { { -1, 0, 0},
                                         { 0, -1, 0},
@@ -437,25 +405,18 @@ struct WarpPerspective : testing::TestWithParam< std::tr1::tuple<cv::gpu::Device
         reflect[1][2] = size.height;
         M = cv::Mat(3, 3, CV_64F, (void*)reflect);
 
-        cv::warpPerspective(src, dst_gold, M, src.size(), warpFlags[flagIdx]);       
+        cv::warpPerspective(src, dst_gold, M, src.size(), flags);       
     }
 };
 
 TEST_P(WarpPerspective, Accuracy)
 {
-    const char* warpFlagStr = warpFlags_str[flagIdx];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(warpFlagStr);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::warpPerspective(cv::gpu::GpuMat(src), gpuRes, M, src.size(), warpFlags[flagIdx]);
+        cv::gpu::warpPerspective(loadMat(src), gpuRes, M, src.size(), flags);
 
         gpuRes.download(dst);
     );
@@ -467,22 +428,20 @@ TEST_P(WarpPerspective, Accuracy)
     EXPECT_MAT_NEAR(dst_gold_roi, dst_roi, 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Range(0, 6)));
-
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Range(0, 6)));
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC, 
+                               (int) (cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), (int) (cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), 
+                               (int) (cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // integral
 
-struct Integral : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
@@ -491,15 +450,16 @@ struct Integral : testing::TestWithParam<cv::gpu::DeviceInfo>
     
     virtual void SetUp()
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, CV_8UC1, 0.0, 255.0, false); 
+        src = randomMat(rng, size, CV_8UC1, 0.0, 255.0, false); 
 
         cv::integral(src, dst_gold, CV_32S);     
     }
@@ -507,15 +467,12 @@ struct Integral : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(Integral, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::integral(cv::gpu::GpuMat(src), gpuRes);
+        cv::gpu::integral(loadMat(src, useRoi), gpuRes);
 
         gpuRes.download(dst);
     );
@@ -523,22 +480,26 @@ TEST_P(Integral, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral, Combine(
+                        ALL_DEVICES, 
+                        USE_ROI));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cvtColor
 
-struct CvtColor : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(CvtColor, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
     
     cv::Mat img;
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -551,19 +512,16 @@ struct CvtColor : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, i
 
 TEST_P(CvtColor, BGR2RGB)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2RGB);
 
         gpuRes.download(dst);
     );
@@ -573,19 +531,16 @@ TEST_P(CvtColor, BGR2RGB)
 
 TEST_P(CvtColor, BGR2RGBA)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2RGBA);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2RGBA);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2RGBA);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2RGBA);
 
         gpuRes.download(dst);
     );
@@ -595,19 +550,16 @@ TEST_P(CvtColor, BGR2RGBA)
 
 TEST_P(CvtColor, BGR2BGRA)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2BGRA);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2BGRA);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2BGRA);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2BGRA);
 
         gpuRes.download(dst);
     );
@@ -617,20 +569,17 @@ TEST_P(CvtColor, BGR2BGRA)
 
 TEST_P(CvtColor, BGRA2RGB)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGRA2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGRA2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGRA2RGB);
 
         gpuRes.download(dst);
     );
@@ -640,20 +589,17 @@ TEST_P(CvtColor, BGRA2RGB)
 
 TEST_P(CvtColor, BGRA2BGR)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGRA2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGRA2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGRA2BGR);
 
         gpuRes.download(dst);
     );
@@ -663,20 +609,17 @@ TEST_P(CvtColor, BGRA2BGR)
 
 TEST_P(CvtColor, BGRA2RGBA)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGRA2RGBA);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2RGBA);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGRA2RGBA);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGRA2RGBA);
 
         gpuRes.download(dst);
     );
@@ -686,19 +629,16 @@ TEST_P(CvtColor, BGRA2RGBA)
 
 TEST_P(CvtColor, BGR2GRAY)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2GRAY);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2GRAY);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2GRAY);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2GRAY);
 
         gpuRes.download(dst);
     );
@@ -708,20 +648,17 @@ TEST_P(CvtColor, BGR2GRAY)
 
 TEST_P(CvtColor, RGB2GRAY)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2GRAY);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2GRAY);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2GRAY);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2GRAY);
 
         gpuRes.download(dst);
     );
@@ -731,20 +668,17 @@ TEST_P(CvtColor, RGB2GRAY)
 
 TEST_P(CvtColor, GRAY2BGR)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2GRAY);
+    cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_GRAY2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_GRAY2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_GRAY2BGR);
 
         gpuRes.download(dst);
     );
@@ -754,20 +688,17 @@ TEST_P(CvtColor, GRAY2BGR)
 
 TEST_P(CvtColor, GRAY2BGRA)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2GRAY);
+    cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_GRAY2BGRA, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGRA, 4);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_GRAY2BGRA, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_GRAY2BGRA, 4);
 
         gpuRes.download(dst);
     );
@@ -777,20 +708,17 @@ TEST_P(CvtColor, GRAY2BGRA)
 
 TEST_P(CvtColor, BGRA2GRAY)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGRA2GRAY);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2GRAY);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGRA2GRAY);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGRA2GRAY);
 
         gpuRes.download(dst);
     );
@@ -800,20 +728,17 @@ TEST_P(CvtColor, BGRA2GRAY)
 
 TEST_P(CvtColor, RGBA2GRAY)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGBA2GRAY);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2GRAY);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGBA2GRAY);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGBA2GRAY);
 
         gpuRes.download(dst);
     );
@@ -826,19 +751,16 @@ TEST_P(CvtColor, BGR2BGR565)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2BGR565);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2BGR565);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2BGR565);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2BGR565);
 
         gpuRes.download(dst);
     );
@@ -851,20 +773,17 @@ TEST_P(CvtColor, RGB2BGR565)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2BGR565);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2BGR565);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2BGR565);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2BGR565);
 
         gpuRes.download(dst);
     );
@@ -877,20 +796,17 @@ TEST_P(CvtColor, BGR5652BGR)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR565);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5652BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5652BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5652BGR);
 
         gpuRes.download(dst);
     );
@@ -903,20 +819,17 @@ TEST_P(CvtColor, BGR5652RGB)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR565);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5652RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5652RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5652RGB);
 
         gpuRes.download(dst);
     );
@@ -929,20 +842,17 @@ TEST_P(CvtColor, BGRA2BGR565)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGRA2BGR565);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2BGR565);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGRA2BGR565);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGRA2BGR565);
 
         gpuRes.download(dst);
     );
@@ -955,20 +865,17 @@ TEST_P(CvtColor, RGBA2BGR565)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGBA2BGR565);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2BGR565);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGBA2BGR565);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGBA2BGR565);
 
         gpuRes.download(dst);
     );
@@ -981,20 +888,17 @@ TEST_P(CvtColor, BGR5652BGRA)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR565);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5652BGRA, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652BGRA, 4);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5652BGRA, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5652BGRA, 4);
 
         gpuRes.download(dst);
     );
@@ -1007,20 +911,17 @@ TEST_P(CvtColor, BGR5652RGBA)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR565);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5652RGBA, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652RGBA, 4);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5652RGBA, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5652RGBA, 4);
 
         gpuRes.download(dst);
     );
@@ -1033,20 +934,17 @@ TEST_P(CvtColor, GRAY2BGR565)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2GRAY);
+    cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_GRAY2BGR565);
+    cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGR565);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_GRAY2BGR565);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_GRAY2BGR565);
 
         gpuRes.download(dst);
     );
@@ -1059,20 +957,17 @@ TEST_P(CvtColor, BGR5652GRAY)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR565);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5652GRAY);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652GRAY);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5652GRAY);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5652GRAY);
 
         gpuRes.download(dst);
     );
@@ -1085,19 +980,16 @@ TEST_P(CvtColor, BGR2BGR555)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2BGR555);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2BGR555);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2BGR555);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2BGR555);
 
         gpuRes.download(dst);
     );
@@ -1110,20 +1002,17 @@ TEST_P(CvtColor, RGB2BGR555)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2BGR555);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2BGR555);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2BGR555);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2BGR555);
 
         gpuRes.download(dst);
     );
@@ -1136,20 +1025,17 @@ TEST_P(CvtColor, BGR5552BGR)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR555);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5552BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5552BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5552BGR);
 
         gpuRes.download(dst);
     );
@@ -1162,20 +1048,17 @@ TEST_P(CvtColor, BGR5552RGB)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR555);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5552RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5552RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5552RGB);
 
         gpuRes.download(dst);
     );
@@ -1188,20 +1071,17 @@ TEST_P(CvtColor, BGRA2BGR555)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGRA2BGR555);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2BGR555);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGRA2BGR555);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGRA2BGR555);
 
         gpuRes.download(dst);
     );
@@ -1214,20 +1094,17 @@ TEST_P(CvtColor, RGBA2BGR555)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGBA2BGR555);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2BGR555);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGBA2BGR555);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGBA2BGR555);
 
         gpuRes.download(dst);
     );
@@ -1240,20 +1117,17 @@ TEST_P(CvtColor, BGR5552BGRA)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR555);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5552BGRA, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552BGRA, 4);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5552BGRA, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5552BGRA, 4);
 
         gpuRes.download(dst);
     );
@@ -1266,20 +1140,17 @@ TEST_P(CvtColor, BGR5552RGBA)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR555);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5552RGBA, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552RGBA, 4);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5552RGBA, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5552RGBA, 4);
 
         gpuRes.download(dst);
     );
@@ -1292,20 +1163,17 @@ TEST_P(CvtColor, GRAY2BGR555)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2GRAY);
+    cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_GRAY2BGR555);
+    cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGR555);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_GRAY2BGR555);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_GRAY2BGR555);
 
         gpuRes.download(dst);
     );
@@ -1318,20 +1186,17 @@ TEST_P(CvtColor, BGR5552GRAY)
     if (type != CV_8U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGR555);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR5552GRAY);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552GRAY);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR5552GRAY);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR5552GRAY);
 
         gpuRes.download(dst);
     );
@@ -1341,19 +1206,16 @@ TEST_P(CvtColor, BGR5552GRAY)
 
 TEST_P(CvtColor, BGR2XYZ)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2XYZ);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2XYZ);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2XYZ);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2XYZ);
 
         gpuRes.download(dst);
     );
@@ -1363,20 +1225,17 @@ TEST_P(CvtColor, BGR2XYZ)
 
 TEST_P(CvtColor, RGB2XYZ)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2XYZ);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2XYZ);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2XYZ);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2XYZ);
 
         gpuRes.download(dst);
     );
@@ -1386,19 +1245,16 @@ TEST_P(CvtColor, RGB2XYZ)
 
 TEST_P(CvtColor, BGR2XYZ4)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2XYZ);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2XYZ);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2XYZ, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2XYZ, 4);
 
         gpuRes.download(dst);
     );
@@ -1414,20 +1270,17 @@ TEST_P(CvtColor, BGR2XYZ4)
 
 TEST_P(CvtColor, BGRA2XYZ4)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2XYZ);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2XYZ);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2XYZ, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2XYZ, 4);
 
         gpuRes.download(dst);
     );
@@ -1443,20 +1296,17 @@ TEST_P(CvtColor, BGRA2XYZ4)
 
 TEST_P(CvtColor, XYZ2BGR)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2XYZ);
+    cv::cvtColor(img, src, cv::COLOR_BGR2XYZ);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_XYZ2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_XYZ2BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_XYZ2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_XYZ2BGR);
 
         gpuRes.download(dst);
     );
@@ -1466,20 +1316,17 @@ TEST_P(CvtColor, XYZ2BGR)
 
 TEST_P(CvtColor, XYZ2RGB)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2XYZ);
+    cv::cvtColor(img, src, cv::COLOR_BGR2XYZ);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_XYZ2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_XYZ2RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_XYZ2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_XYZ2RGB);
 
         gpuRes.download(dst);
     );
@@ -1489,13 +1336,10 @@ TEST_P(CvtColor, XYZ2RGB)
 
 TEST_P(CvtColor, XYZ42BGR)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2XYZ);
+    cv::cvtColor(img, src, cv::COLOR_BGR2XYZ);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_XYZ2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_XYZ2BGR);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -1507,7 +1351,7 @@ TEST_P(CvtColor, XYZ42BGR)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_XYZ2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_XYZ2BGR);
 
         gpuRes.download(dst);
     );
@@ -1517,13 +1361,10 @@ TEST_P(CvtColor, XYZ42BGR)
 
 TEST_P(CvtColor, XYZ42BGRA)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2XYZ);
+    cv::cvtColor(img, src, cv::COLOR_BGR2XYZ);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_XYZ2BGR, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_XYZ2BGR, 4);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -1535,7 +1376,7 @@ TEST_P(CvtColor, XYZ42BGRA)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_XYZ2BGR, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_XYZ2BGR, 4);
 
         gpuRes.download(dst);
     );
@@ -1545,19 +1386,16 @@ TEST_P(CvtColor, XYZ42BGRA)
 
 TEST_P(CvtColor, BGR2YCrCb)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2YCrCb);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YCrCb);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2YCrCb);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2YCrCb);
 
         gpuRes.download(dst);
     );
@@ -1567,20 +1405,17 @@ TEST_P(CvtColor, BGR2YCrCb)
 
 TEST_P(CvtColor, RGB2YCrCb)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2YCrCb);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2YCrCb);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2YCrCb);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2YCrCb);
 
         gpuRes.download(dst);
     );
@@ -1590,19 +1425,16 @@ TEST_P(CvtColor, RGB2YCrCb)
 
 TEST_P(CvtColor, BGR2YCrCb4)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2YCrCb);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YCrCb);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2YCrCb, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2YCrCb, 4);
 
         gpuRes.download(dst);
     );
@@ -1618,20 +1450,17 @@ TEST_P(CvtColor, BGR2YCrCb4)
 
 TEST_P(CvtColor, RGBA2YCrCb4)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2YCrCb);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YCrCb);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2YCrCb, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2YCrCb, 4);
 
         gpuRes.download(dst);
     );
@@ -1647,20 +1476,17 @@ TEST_P(CvtColor, RGBA2YCrCb4)
 
 TEST_P(CvtColor, YCrCb2BGR)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2YCrCb);
+    cv::cvtColor(img, src, cv::COLOR_BGR2YCrCb);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YCrCb2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YCrCb2BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YCrCb2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YCrCb2BGR);
 
         gpuRes.download(dst);
     );
@@ -1670,20 +1496,17 @@ TEST_P(CvtColor, YCrCb2BGR)
 
 TEST_P(CvtColor, YCrCb2RGB)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2YCrCb);
+    cv::cvtColor(img, src, cv::COLOR_BGR2YCrCb);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YCrCb2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YCrCb2RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YCrCb2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YCrCb2RGB);
 
         gpuRes.download(dst);
     );
@@ -1693,13 +1516,10 @@ TEST_P(CvtColor, YCrCb2RGB)
 
 TEST_P(CvtColor, YCrCb42RGB)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2YCrCb);
+    cv::cvtColor(img, src, cv::COLOR_BGR2YCrCb);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YCrCb2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YCrCb2RGB);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -1711,7 +1531,7 @@ TEST_P(CvtColor, YCrCb42RGB)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YCrCb2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YCrCb2RGB);
 
         gpuRes.download(dst);
     );
@@ -1721,13 +1541,10 @@ TEST_P(CvtColor, YCrCb42RGB)
 
 TEST_P(CvtColor, YCrCb42RGBA)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2YCrCb);
+    cv::cvtColor(img, src, cv::COLOR_BGR2YCrCb);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YCrCb2RGB, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YCrCb2RGB, 4);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -1739,7 +1556,7 @@ TEST_P(CvtColor, YCrCb42RGBA)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YCrCb2RGB, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YCrCb2RGB, 4);
 
         gpuRes.download(dst);
     );
@@ -1752,19 +1569,16 @@ TEST_P(CvtColor, BGR2HSV)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2HSV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HSV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HSV);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2HSV);
 
         gpuRes.download(dst);
     );
@@ -1777,20 +1591,17 @@ TEST_P(CvtColor, RGB2HSV)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HSV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HSV);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HSV);
 
         gpuRes.download(dst);
     );
@@ -1803,20 +1614,17 @@ TEST_P(CvtColor, RGB2HSV4)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HSV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HSV, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HSV, 4);
 
         gpuRes.download(dst);
     );
@@ -1835,20 +1643,17 @@ TEST_P(CvtColor, RGBA2HSV4)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HSV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HSV, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HSV, 4);
 
         gpuRes.download(dst);
     );
@@ -1867,19 +1672,16 @@ TEST_P(CvtColor, BGR2HLS)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2HLS);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HLS);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HLS);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2HLS);
 
         gpuRes.download(dst);
     );
@@ -1892,20 +1694,17 @@ TEST_P(CvtColor, RGB2HLS)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HLS);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HLS);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HLS);
 
         gpuRes.download(dst);
     );
@@ -1918,20 +1717,17 @@ TEST_P(CvtColor, RGB2HLS4)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HLS);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HLS, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HLS, 4);
 
         gpuRes.download(dst);
     );
@@ -1950,20 +1746,17 @@ TEST_P(CvtColor, RGBA2HLS4)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HLS);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HLS, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HLS, 4);
 
         gpuRes.download(dst);
     );
@@ -1982,20 +1775,17 @@ TEST_P(CvtColor, HSV2BGR)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2BGR);
 
         gpuRes.download(dst);
     );
@@ -2008,20 +1798,17 @@ TEST_P(CvtColor, HSV2RGB)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2RGB);
 
         gpuRes.download(dst);
     );
@@ -2034,13 +1821,10 @@ TEST_P(CvtColor, HSV42BGR)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2BGR);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2052,7 +1836,7 @@ TEST_P(CvtColor, HSV42BGR)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2BGR);
 
         gpuRes.download(dst);
     );
@@ -2065,13 +1849,10 @@ TEST_P(CvtColor, HSV42BGRA)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2BGR, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2BGR, 4);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2083,7 +1864,7 @@ TEST_P(CvtColor, HSV42BGRA)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2BGR, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2BGR, 4);
 
         gpuRes.download(dst);
     );
@@ -2096,20 +1877,17 @@ TEST_P(CvtColor, HLS2BGR)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2BGR);
 
         gpuRes.download(dst);
     );
@@ -2122,20 +1900,17 @@ TEST_P(CvtColor, HLS2RGB)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2RGB);
 
         gpuRes.download(dst);
     );
@@ -2148,13 +1923,10 @@ TEST_P(CvtColor, HLS42RGB)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2166,7 +1938,7 @@ TEST_P(CvtColor, HLS42RGB)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2RGB);
 
         gpuRes.download(dst);
     );
@@ -2179,13 +1951,10 @@ TEST_P(CvtColor, HLS42RGBA)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2RGB, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB, 4);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2197,7 +1966,7 @@ TEST_P(CvtColor, HLS42RGBA)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2RGB, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2RGB, 4);
 
         gpuRes.download(dst);
     );
@@ -2210,19 +1979,16 @@ TEST_P(CvtColor, BGR2HSV_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2HSV_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HSV_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HSV_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2HSV_FULL);
 
         gpuRes.download(dst);
     );
@@ -2235,20 +2001,17 @@ TEST_P(CvtColor, RGB2HSV_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HSV_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HSV_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HSV_FULL);
 
         gpuRes.download(dst);
     );
@@ -2261,20 +2024,17 @@ TEST_P(CvtColor, RGB2HSV4_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HSV_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HSV_FULL, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HSV_FULL, 4);
 
         gpuRes.download(dst);
     );
@@ -2293,20 +2053,17 @@ TEST_P(CvtColor, RGBA2HSV4_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HSV_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HSV_FULL, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HSV_FULL, 4);
 
         gpuRes.download(dst);
     );
@@ -2325,19 +2082,16 @@ TEST_P(CvtColor, BGR2HLS_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2HLS_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HLS_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HLS_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2HLS_FULL);
 
         gpuRes.download(dst);
     );
@@ -2350,20 +2104,17 @@ TEST_P(CvtColor, RGB2HLS_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HLS_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HLS_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HLS_FULL);
 
         gpuRes.download(dst);
     );
@@ -2376,20 +2127,17 @@ TEST_P(CvtColor, RGB2HLS4_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HLS_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HLS_FULL, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HLS_FULL, 4);
 
         gpuRes.download(dst);
     );
@@ -2408,20 +2156,17 @@ TEST_P(CvtColor, RGBA2HLS4_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2HLS_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2HLS_FULL, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2HLS_FULL, 4);
 
         gpuRes.download(dst);
     );
@@ -2440,20 +2185,17 @@ TEST_P(CvtColor, HSV2BGR_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2BGR_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2BGR_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2BGR_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2BGR_FULL);
 
         gpuRes.download(dst);
     );
@@ -2466,20 +2208,17 @@ TEST_P(CvtColor, HSV2RGB_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2RGB_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2RGB_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2RGB_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2RGB_FULL);
 
         gpuRes.download(dst);
     );
@@ -2492,13 +2231,10 @@ TEST_P(CvtColor, HSV42RGB_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2RGB_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2RGB_FULL);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2510,7 +2246,7 @@ TEST_P(CvtColor, HSV42RGB_FULL)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2RGB_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2RGB_FULL);
 
         gpuRes.download(dst);
     );
@@ -2523,13 +2259,10 @@ TEST_P(CvtColor, HSV42RGBA_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HSV_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HSV_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HSV2RGB_FULL, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HSV2RGB_FULL, 4);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2541,7 +2274,7 @@ TEST_P(CvtColor, HSV42RGBA_FULL)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2RGB_FULL, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HSV2RGB_FULL, 4);
 
         gpuRes.download(dst);
     );
@@ -2554,20 +2287,17 @@ TEST_P(CvtColor, HLS2BGR_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2BGR_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2BGR_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2BGR_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2BGR_FULL);
 
         gpuRes.download(dst);
     );
@@ -2580,20 +2310,17 @@ TEST_P(CvtColor, HLS2RGB_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2RGB_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB_FULL);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2RGB_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2RGB_FULL);
 
         gpuRes.download(dst);
     );
@@ -2606,13 +2333,10 @@ TEST_P(CvtColor, HLS42RGB_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2RGB_FULL);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB_FULL);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2624,7 +2348,7 @@ TEST_P(CvtColor, HLS42RGB_FULL)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2RGB_FULL);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2RGB_FULL);
 
         gpuRes.download(dst);
     );
@@ -2637,13 +2361,10 @@ TEST_P(CvtColor, HLS42RGBA_FULL)
     if (type == CV_16U)
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2HLS_FULL);
+    cv::cvtColor(img, src, cv::COLOR_BGR2HLS_FULL);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_HLS2RGB_FULL, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB_FULL, 4);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2655,7 +2376,7 @@ TEST_P(CvtColor, HLS42RGBA_FULL)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2RGB_FULL, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_HLS2RGB_FULL, 4);
 
         gpuRes.download(dst);
     );
@@ -2665,19 +2386,16 @@ TEST_P(CvtColor, HLS42RGBA_FULL)
 
 TEST_P(CvtColor, BGR2YUV)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2YUV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YUV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2YUV);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2YUV);
 
         gpuRes.download(dst);
     );
@@ -2687,20 +2405,17 @@ TEST_P(CvtColor, BGR2YUV)
 
 TEST_P(CvtColor, RGB2YUV)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGB);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGB);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2YUV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2YUV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2YUV);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2YUV);
 
         gpuRes.download(dst);
     );
@@ -2710,20 +2425,17 @@ TEST_P(CvtColor, RGB2YUV)
 
 TEST_P(CvtColor, YUV2BGR)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2YUV);
+    cv::cvtColor(img, src, cv::COLOR_BGR2YUV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YUV2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YUV2BGR);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YUV2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YUV2BGR);
 
         gpuRes.download(dst);
     );
@@ -2733,13 +2445,10 @@ TEST_P(CvtColor, YUV2BGR)
 
 TEST_P(CvtColor, YUV42BGR)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2YUV);
+    cv::cvtColor(img, src, cv::COLOR_BGR2YUV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YUV2BGR);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YUV2BGR);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2751,7 +2460,7 @@ TEST_P(CvtColor, YUV42BGR)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YUV2BGR);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YUV2BGR);
 
         gpuRes.download(dst);
     );
@@ -2761,13 +2470,10 @@ TEST_P(CvtColor, YUV42BGR)
 
 TEST_P(CvtColor, YUV42BGRA)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2YUV);
+    cv::cvtColor(img, src, cv::COLOR_BGR2YUV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YUV2BGR, 4);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YUV2BGR, 4);
 
     cv::Mat channels[4];
     cv::split(src, channels);
@@ -2779,7 +2485,7 @@ TEST_P(CvtColor, YUV42BGRA)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YUV2BGR, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YUV2BGR, 4);
 
         gpuRes.download(dst);
     );
@@ -2789,20 +2495,17 @@ TEST_P(CvtColor, YUV42BGRA)
 
 TEST_P(CvtColor, YUV2RGB)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_RGB2YUV);
+    cv::cvtColor(img, src, cv::COLOR_RGB2YUV);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_YUV2RGB);
+    cv::cvtColor(src, dst_gold, cv::COLOR_YUV2RGB);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YUV2RGB);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_YUV2RGB);
 
         gpuRes.download(dst);
     );
@@ -2812,19 +2515,16 @@ TEST_P(CvtColor, YUV2RGB)
 
 TEST_P(CvtColor, BGR2YUV4)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src = img;
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_BGR2YUV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YUV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2YUV, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_BGR2YUV, 4);
 
         gpuRes.download(dst);
     );
@@ -2840,20 +2540,17 @@ TEST_P(CvtColor, BGR2YUV4)
 
 TEST_P(CvtColor, RGBA2YUV4)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-
     cv::Mat src;
-    cv::cvtColor(img, src, CV_BGR2RGBA);
+    cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
     cv::Mat dst_gold;
-    cv::cvtColor(src, dst_gold, CV_RGB2YUV);
+    cv::cvtColor(src, dst_gold, cv::COLOR_RGB2YUV);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuRes;
 
-        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_RGB2YUV, 4);
+        cv::gpu::cvtColor(loadMat(src, useRoi), gpuRes, cv::COLOR_RGB2YUV, 4);
 
         gpuRes.download(dst);
     );
@@ -2867,14 +2564,15 @@ TEST_P(CvtColor, RGBA2YUV4)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8U, CV_16U, CV_32F)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8U, CV_16U, CV_32F),
+                        USE_ROI));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // histograms
 
-struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct HistEven : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
     
@@ -2917,15 +2615,11 @@ struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(HistEven, Accuracy)
 {
-    ASSERT_TRUE(!hsv.empty());
-
-    PRINT_PARAM(devInfo);
-
     cv::Mat hist;
     
     ASSERT_NO_THROW(
         std::vector<cv::gpu::GpuMat> srcs;
-        cv::gpu::split(cv::gpu::GpuMat(hsv), srcs);
+        cv::gpu::split(loadMat(hsv), srcs);
 
         cv::gpu::GpuMat gpuHist;
 
@@ -2937,9 +2631,9 @@ TEST_P(HistEven, Accuracy)
     EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, HistEven, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, HistEven, ALL_DEVICES);
 
-struct CalcHist : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct CalcHist : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -2953,11 +2647,11 @@ struct CalcHist : testing::TestWithParam<cv::gpu::DeviceInfo>
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        src = cvtest::randomMat(rng, size, CV_8UC1, 0, 255, false);
+        src = randomMat(rng, size, CV_8UC1, 0, 255, false);
 
         hist_gold.create(1, 256, CV_32SC1);
         hist_gold.setTo(cv::Scalar::all(0));
@@ -2975,15 +2669,12 @@ struct CalcHist : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(CalcHist, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat hist;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuHist;
 
-        cv::gpu::calcHist(cv::gpu::GpuMat(src), gpuHist);
+        cv::gpu::calcHist(loadMat(src), gpuHist);
 
         gpuHist.download(hist);
     );
@@ -2991,9 +2682,9 @@ TEST_P(CalcHist, Accuracy)
     EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, ALL_DEVICES);
 
-struct EqualizeHist : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct EqualizeHist : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -3007,11 +2698,11 @@ struct EqualizeHist : testing::TestWithParam<cv::gpu::DeviceInfo>
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
         
-        src = cvtest::randomMat(rng, size, CV_8UC1, 0, 255, false);
+        src = randomMat(rng, size, CV_8UC1, 0, 255, false);
 
         cv::equalizeHist(src, dst_gold);
     }
@@ -3019,15 +2710,12 @@ struct EqualizeHist : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(EqualizeHist, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpuDst;
 
-        cv::gpu::equalizeHist(cv::gpu::GpuMat(src), gpuDst);
+        cv::gpu::equalizeHist(loadMat(src), gpuDst);
 
         gpuDst.download(dst);
     );
@@ -3035,19 +2723,16 @@ TEST_P(EqualizeHist, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 3.0);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, ALL_DEVICES);
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cornerHarris
 
-static const int borderTypes[] = {cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT101, cv::BORDER_TRANSPARENT};
-static const char* borderTypes_str[] = {"BORDER_REPLICATE", "BORDER_CONSTANT", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT101", "BORDER_TRANSPARENT"};
-
-struct CornerHarris : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, Border)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
-    int borderTypeIdx;
+    int borderType;
 
     cv::Mat src;
     int blockSize;
@@ -3058,13 +2743,13 @@ struct CornerHarris : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInf
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        borderTypeIdx = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        borderType = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
     
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
         
         cv::Mat img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);
         ASSERT_FALSE(img.empty());
@@ -3075,44 +2760,36 @@ struct CornerHarris : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInf
         apertureSize = 1 + 2 * (rng.next() % 4);        
         k = rng.uniform(0.1, 0.9);
 
-        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderTypes[borderTypeIdx]);
+        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
     }
 };
 
 TEST_P(CornerHarris, Accuracy)
 {
-    const char* borderTypeStr = borderTypes_str[borderTypeIdx];
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(borderTypeStr);
-    PRINT_PARAM(blockSize);
-    PRINT_PARAM(apertureSize);
-    PRINT_PARAM(k);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dev_dst, blockSize, apertureSize, k, borderTypes[borderTypeIdx]);
+        cv::gpu::cornerHarris(loadMat(src), dev_dst, blockSize, apertureSize, k, borderType);
         dev_dst.download(dst);
     );
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_32FC1), 
-                        testing::Values(0, 4)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_32FC1), 
+                        Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE)));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cornerMinEigen
 
-struct CornerMinEigen : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
-    int borderTypeIdx;
+    int borderType;
 
     cv::Mat src;
     int blockSize;
@@ -3122,13 +2799,13 @@ struct CornerMinEigen : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceI
     
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        borderTypeIdx = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        borderType = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());        
     
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
         
         cv::Mat img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);
         ASSERT_FALSE(img.empty());
@@ -3138,39 +2815,32 @@ struct CornerMinEigen : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceI
         blockSize = 1 + rng.next() % 5;
         apertureSize = 1 + 2 * (rng.next() % 4);
 
-        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderTypes[borderTypeIdx]);
+        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
     }
 };
 
 TEST_P(CornerMinEigen, Accuracy)
 {
-    const char* borderTypeStr = borderTypes_str[borderTypeIdx];
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(borderTypeStr);
-    PRINT_PARAM(blockSize);
-    PRINT_PARAM(apertureSize);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dev_dst, blockSize, apertureSize, borderTypes[borderTypeIdx]);
+        cv::gpu::cornerMinEigenVal(loadMat(src), dev_dst, blockSize, apertureSize, borderType);
         dev_dst.download(dst);
     );
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigen, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_32FC1), 
-                        testing::Values(0, 4)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigen, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_32FC1), 
+                        Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE)));
 
 ////////////////////////////////////////////////////////////////////////
 // ColumnSum
 
-struct ColumnSum : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct ColumnSum : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -3183,24 +2853,21 @@ struct ColumnSum : testing::TestWithParam<cv::gpu::DeviceInfo>
 
         cv::gpu::setDevice(devInfo.deviceID());
     
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 400), rng.uniform(100, 400));
 
-        src = cvtest::randomMat(rng, size, CV_32F, 0.0, 1.0, false);
+        src = randomMat(rng, size, CV_32F, 0.0, 1.0, false);
     }
 };
 
 TEST_P(ColumnSum, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
     
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::columnSum(cv::gpu::GpuMat(src), dev_dst);
+        cv::gpu::columnSum(loadMat(src), dev_dst);
         dev_dst.download(dst);
     );
 
@@ -3222,19 +2889,17 @@ TEST_P(ColumnSum, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, ALL_DEVICES);
 
 ////////////////////////////////////////////////////////////////////////
 // Norm
 
-static const int normTypes[] = {cv::NORM_INF, cv::NORM_L1, cv::NORM_L2};
-static const char* normTypes_str[] = {"NORM_INF", "NORM_L1", "NORM_L2"};
-
-struct Norm : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(Norm, cv::gpu::DeviceInfo, MatType, NormCode, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
-    int normTypeIdx;
+    int normType;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
@@ -3243,51 +2908,47 @@ struct Norm : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
-        normTypeIdx = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        normType = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
     
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 400), rng.uniform(100, 400));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 10.0, false);
+        src = randomMat(rng, size, type, 0.0, 10.0, false);
 
-        gold = cv::norm(src, normTypes[normTypeIdx]);
+        gold = cv::norm(src, normType);
     }
 };
 
 TEST_P(Norm, Accuracy)
 {
-    const char* normTypeStr = normTypes_str[normTypeIdx];
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    PRINT_PARAM(normTypeStr);
-
     double res;
 
     ASSERT_NO_THROW(
-        res = cv::gpu::norm(cv::gpu::GpuMat(src), normTypes[normTypeIdx]);
+        res = cv::gpu::norm(loadMat(src, useRoi), normType);
     );
 
     ASSERT_NEAR(res, gold, 0.5);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Norm, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::ValuesIn(types(CV_8U, CV_32F, 1, 1)),
-                        testing::Range(0, 3)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Norm, Combine(
+                        ALL_DEVICES, 
+                        TYPES(CV_8U, CV_32F, 1, 1),
+                        Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // reprojectImageTo3D
 
-struct ReprojectImageTo3D : testing::TestWithParam<cv::gpu::DeviceInfo>
+PARAM_TEST_CASE(ReprojectImageTo3D, cv::gpu::DeviceInfo, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat disp;
@@ -3297,17 +2958,18 @@ struct ReprojectImageTo3D : testing::TestWithParam<cv::gpu::DeviceInfo>
 
     virtual void SetUp()
     {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
     
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 500), rng.uniform(100, 500));
 
-        disp = cvtest::randomMat(rng, size, CV_8UC1, 5.0, 30.0, false);
+        disp = randomMat(rng, size, CV_8UC1, 5.0, 30.0, false);
 
-        Q = cvtest::randomMat(rng, cv::Size(4, 4), CV_32FC1, 0.1, 1.0, false);
+        Q = randomMat(rng, cv::Size(4, 4), CV_32FC1, 0.1, 1.0, false);
 
         cv::reprojectImageTo3D(disp, dst_gold, Q, false);
     }
@@ -3315,14 +2977,11 @@ struct ReprojectImageTo3D : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(ReprojectImageTo3D, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat gpures;
-        cv::gpu::reprojectImageTo3D(cv::gpu::GpuMat(disp), gpures, Q);
+        cv::gpu::reprojectImageTo3D(loadMat(disp, useRoi), gpures, Q);
         gpures.download(dst);
     );
 
@@ -3345,12 +3004,12 @@ TEST_P(ReprojectImageTo3D, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, Combine(ALL_DEVICES, USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // meanShift
 
-struct MeanShift : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct MeanShift : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
     
@@ -3386,13 +3045,11 @@ TEST_P(MeanShift, Filtering)
 
     ASSERT_FALSE(img_template.empty());
 
-    PRINT_PARAM(devInfo);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::meanShiftFiltering(cv::gpu::GpuMat(rgba), dev_dst, spatialRad, colorRad);
+        cv::gpu::meanShiftFiltering(loadMat(rgba), dev_dst, spatialRad, colorRad);
         dev_dst.download(dst);
     );
 
@@ -3420,19 +3077,17 @@ TEST_P(MeanShift, Proc)
 
     ASSERT_TRUE(!rgba.empty() && !spmap_template.empty());
 
-    PRINT_PARAM(devInfo);
-
     cv::Mat rmap_filtered;
     cv::Mat rmap;
     cv::Mat spmap;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_rmap_filtered;
-        cv::gpu::meanShiftFiltering(cv::gpu::GpuMat(rgba), d_rmap_filtered, spatialRad, colorRad);
+        cv::gpu::meanShiftFiltering(loadMat(rgba), d_rmap_filtered, spatialRad, colorRad);
 
         cv::gpu::GpuMat d_rmap;
         cv::gpu::GpuMat d_spmap;
-        cv::gpu::meanShiftProc(cv::gpu::GpuMat(rgba), d_rmap, d_spmap, spatialRad, colorRad);
+        cv::gpu::meanShiftProc(loadMat(rgba), d_rmap, d_spmap, spatialRad, colorRad);
 
         d_rmap_filtered.download(rmap_filtered);
         d_rmap.download(rmap);
@@ -3445,9 +3100,9 @@ TEST_P(MeanShift, Proc)
     EXPECT_MAT_NEAR(spmap_template, spmap, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MeanShift, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShift, ALL_DEVICES);
 
-struct MeanShiftSegmentation : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(MeanShiftSegmentation, cv::gpu::DeviceInfo, int)
 {
     cv::gpu::DeviceInfo devInfo;
     int minsize;
@@ -3458,8 +3113,8 @@ struct MeanShiftSegmentation : testing::TestWithParam< std::tr1::tuple<cv::gpu::
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        minsize = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        minsize = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -3482,13 +3137,10 @@ struct MeanShiftSegmentation : testing::TestWithParam< std::tr1::tuple<cv::gpu::
 
 TEST_P(MeanShiftSegmentation, Regression)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(minsize);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
-        cv::gpu::meanShiftSegmentation(cv::gpu::GpuMat(rgba), dst, 10, 10, minsize);
+        cv::gpu::meanShiftSegmentation(loadMat(rgba), dst, 10, 10, minsize);
     );
 
     cv::Mat dst_rgb;
@@ -3497,16 +3149,14 @@ TEST_P(MeanShiftSegmentation, Regression)
     EXPECT_MAT_SIMILAR(dst_gold, dst_rgb, 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(0, 4, 20, 84, 340, 1364)));
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, Combine(
+                        ALL_DEVICES,
+                        Values(0, 4, 20, 84, 340, 1364)));
 
 ////////////////////////////////////////////////////////////////////////////////
 // matchTemplate
 
-static const char* matchTemplateMethods[] = {"SQDIFF", "SQDIFF_NORMED", "CCORR", "CCORR_NORMED", "CCOEFF", "CCOEFF_NORMED"};
-
-struct MatchTemplate8U : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(MatchTemplate8U, cv::gpu::DeviceInfo, int, TemplateMethod)
 {
     cv::gpu::DeviceInfo devInfo;
     int cn;
@@ -3519,21 +3169,21 @@ struct MatchTemplate8U : testing::TestWithParam< std::tr1::tuple<cv::gpu::Device
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        cn = std::tr1::get<1>(GetParam());
-        method = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        cn = GET_PARAM(1);
+        method = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         n = rng.uniform(30, 100);
         m = rng.uniform(30, 100);
         h = rng.uniform(5, n - 1);
         w = rng.uniform(5, m - 1);
 
-        image = cvtest::randomMat(rng, cv::Size(m, n), CV_MAKETYPE(CV_8U, cn), 1.0, 10.0, false);
-        templ = cvtest::randomMat(rng, cv::Size(w, h), CV_MAKETYPE(CV_8U, cn), 1.0, 10.0, false);
+        image = randomMat(rng, cv::Size(m, n), CV_MAKETYPE(CV_8U, cn), 1.0, 10.0, false);
+        templ = randomMat(rng, cv::Size(w, h), CV_MAKETYPE(CV_8U, cn), 1.0, 10.0, false);
 
         cv::matchTemplate(image, templ, dst_gold, method);
     }
@@ -3541,32 +3191,24 @@ struct MatchTemplate8U : testing::TestWithParam< std::tr1::tuple<cv::gpu::Device
 
 TEST_P(MatchTemplate8U, Regression)
 {
-    const char* matchTemplateMethodStr = matchTemplateMethods[method];
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(cn);
-    PRINT_PARAM(matchTemplateMethodStr);
-    PRINT_PARAM(n);
-    PRINT_PARAM(m);
-    PRINT_PARAM(h);
-    PRINT_PARAM(w);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(templ), dev_dst, method);
+        cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dev_dst, method);
         dev_dst.download(dst);
     );
 
     EXPECT_MAT_NEAR(dst_gold, dst, 5 * h * w * 1e-4);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate8U, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Range(1, 5), 
-                        testing::Values((int)CV_TM_SQDIFF, (int)CV_TM_SQDIFF_NORMED, (int)CV_TM_CCORR, (int)CV_TM_CCORR_NORMED, (int)CV_TM_CCOEFF, (int)CV_TM_CCOEFF_NORMED)));
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate8U, Combine(
+                        ALL_DEVICES,
+                        Range(1, 5), 
+                        Values((int)cv::TM_SQDIFF, (int) cv::TM_SQDIFF_NORMED, (int) cv::TM_CCORR, (int) cv::TM_CCORR_NORMED, (int) cv::TM_CCOEFF, (int) cv::TM_CCOEFF_NORMED)));
 
-struct MatchTemplate32F : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+
+PARAM_TEST_CASE(MatchTemplate32F, cv::gpu::DeviceInfo, int, TemplateMethod)
 {
     cv::gpu::DeviceInfo devInfo;
     int cn;
@@ -3579,21 +3221,21 @@ struct MatchTemplate32F : testing::TestWithParam< std::tr1::tuple<cv::gpu::Devic
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        cn = std::tr1::get<1>(GetParam());
-        method = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        cn = GET_PARAM(1);
+        method = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         n = rng.uniform(30, 100);
         m = rng.uniform(30, 100);
         h = rng.uniform(5, n - 1);
         w = rng.uniform(5, m - 1);
 
-        image = cvtest::randomMat(rng, cv::Size(m, n), CV_MAKETYPE(CV_32F, cn), 0.001, 1.0, false);
-        templ = cvtest::randomMat(rng, cv::Size(w, h), CV_MAKETYPE(CV_32F, cn), 0.001, 1.0, false);
+        image = randomMat(rng, cv::Size(m, n), CV_MAKETYPE(CV_32F, cn), 0.001, 1.0, false);
+        templ = randomMat(rng, cv::Size(w, h), CV_MAKETYPE(CV_32F, cn), 0.001, 1.0, false);
 
         cv::matchTemplate(image, templ, dst_gold, method);
     }
@@ -3601,51 +3243,39 @@ struct MatchTemplate32F : testing::TestWithParam< std::tr1::tuple<cv::gpu::Devic
 
 TEST_P(MatchTemplate32F, Regression)
 {
-    const char* matchTemplateMethodStr = matchTemplateMethods[method];
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(cn);
-    PRINT_PARAM(matchTemplateMethodStr);
-    PRINT_PARAM(n);
-    PRINT_PARAM(m);
-    PRINT_PARAM(h);
-    PRINT_PARAM(w);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(templ), dev_dst, method);
+        cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dev_dst, method);
         dev_dst.download(dst);
     );
 
     EXPECT_MAT_NEAR(dst_gold, dst, 0.25 * h * w * 1e-4);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate32F, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Range(1, 5), 
-                        testing::Values((int)CV_TM_SQDIFF, (int)CV_TM_CCORR)));
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate32F, Combine(
+                        ALL_DEVICES, 
+                        Range(1, 5), 
+                        Values((int) cv::TM_SQDIFF, (int) cv::TM_CCORR)));
 
-struct MatchTemplateBlackSource : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+
+PARAM_TEST_CASE(MatchTemplateBlackSource, cv::gpu::DeviceInfo, TemplateMethod)
 {
     cv::gpu::DeviceInfo devInfo;
     int method;
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        method = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        method = GET_PARAM(1);
+
         cv::gpu::setDevice(devInfo.deviceID());
     }
 };
 
 TEST_P(MatchTemplateBlackSource, Accuracy)
 {
-    const char* matchTemplateMethodStr = matchTemplateMethods[method];
-
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(matchTemplateMethodStr);
-
     cv::Mat image = readImage("matchtemplate/black.png");
     ASSERT_FALSE(image.empty());
 
@@ -3658,7 +3288,7 @@ TEST_P(MatchTemplateBlackSource, Accuracy)
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, method);
+        cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), dev_dst, method);
         dev_dst.download(dst);
     );
 
@@ -3669,12 +3299,12 @@ TEST_P(MatchTemplateBlackSource, Accuracy)
     ASSERT_EQ(maxLocGold, maxLoc);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplateBlackSource, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values((int)CV_TM_CCOEFF_NORMED, (int)CV_TM_CCORR_NORMED)));
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplateBlackSource, Combine(
+                        ALL_DEVICES,
+                        Values((int) cv::TM_CCOEFF_NORMED, (int) cv::TM_CCORR_NORMED)));
 
 
-struct MatchTemplate_CCOEF_NORMED : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std::pair<std::string, std::string> > >
+PARAM_TEST_CASE(MatchTemplate_CCOEF_NORMED, cv::gpu::DeviceInfo, std::pair<std::string, std::string>)
 {
     cv::gpu::DeviceInfo devInfo;
     std::string imageName;
@@ -3684,9 +3314,9 @@ struct MatchTemplate_CCOEF_NORMED : testing::TestWithParam< std::tr1::tuple<cv::
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        imageName = std::tr1::get<1>(GetParam()).first;
-        patternName = std::tr1::get<1>(GetParam()).second;
+        devInfo = GET_PARAM(0);
+        imageName = GET_PARAM(1).first;
+        patternName = GET_PARAM(1).second;
 
         image = readImage(imageName);
         ASSERT_FALSE(image.empty());
@@ -3698,10 +3328,6 @@ struct MatchTemplate_CCOEF_NORMED : testing::TestWithParam< std::tr1::tuple<cv::
 
 TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(imageName);
-    PRINT_PARAM(patternName);
-
     cv::Mat dstGold;
     cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED);
     double minValGold, maxValGold;
@@ -3711,7 +3337,7 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
     cv::Mat dst;
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED);
+        cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED);
         dev_dst.download(dst);
     );
 
@@ -3725,13 +3351,12 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
     ASSERT_GE(minVal, -1.);
 }
 
-
-INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, Combine(
+                        ALL_DEVICES,
+                        Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
 
 
-struct MatchTemplate_CCOEF_NORMED_NoThrow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std::pair<std::string, std::string> > >
+PARAM_TEST_CASE(MatchTemplate_CCOEF_NORMED_NoThrow, cv::gpu::DeviceInfo, std::pair<std::string, std::string>)
 {
     cv::gpu::DeviceInfo devInfo;
     std::string imageName;
@@ -3741,9 +3366,9 @@ struct MatchTemplate_CCOEF_NORMED_NoThrow : testing::TestWithParam< std::tr1::tu
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        imageName = std::tr1::get<1>(GetParam()).first;
-        patternName = std::tr1::get<1>(GetParam()).second;
+        devInfo = GET_PARAM(0);
+        imageName = GET_PARAM(1).first;
+        patternName = GET_PARAM(1).second;
 
         image = readImage(imageName);
         ASSERT_FALSE(image.empty());
@@ -3755,10 +3380,6 @@ struct MatchTemplate_CCOEF_NORMED_NoThrow : testing::TestWithParam< std::tr1::tu
 
 TEST_P(MatchTemplate_CCOEF_NORMED_NoThrow, NoThrow)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(imageName);
-    PRINT_PARAM(patternName);
-
     cv::Mat dstGold;
     cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED);
     double minValGold, maxValGold;
@@ -3768,23 +3389,20 @@ TEST_P(MatchTemplate_CCOEF_NORMED_NoThrow, NoThrow)
     cv::Mat dst;
     ASSERT_NO_THROW(
         cv::gpu::GpuMat dev_dst;
-        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED);
+        cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED);
         dev_dst.download(dst);
     );
 
 }
 
-
-INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED_NoThrow, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png")))));
-
-
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED_NoThrow, Combine(
+                        ALL_DEVICES,
+                        Values(std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png")))));
 
 ////////////////////////////////////////////////////////////////////////////
 // MulSpectrums
 
-struct MulSpectrums : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, DftFlags)
 {
     cv::gpu::DeviceInfo devInfo;
     int flag;
@@ -3793,23 +3411,20 @@ struct MulSpectrums : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInf
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        flag = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        flag = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
-        a = cvtest::randomMat(rng, cv::Size(rng.uniform(100, 200), rng.uniform(100, 200)), CV_32FC2, 0.0, 10.0, false);
-        b = cvtest::randomMat(rng, a.size(), CV_32FC2, 0.0, 10.0, false);
+        a = randomMat(rng, cv::Size(rng.uniform(100, 200), rng.uniform(100, 200)), CV_32FC2, 0.0, 10.0, false);
+        b = randomMat(rng, a.size(), CV_32FC2, 0.0, 10.0, false);
     }
 };
 
 TEST_P(MulSpectrums, Simple)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(flag);
-
     cv::Mat c_gold;
     cv::mulSpectrums(a, b, c_gold, flag, false);
     
@@ -3818,7 +3433,7 @@ TEST_P(MulSpectrums, Simple)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_c;
 
-        cv::gpu::mulSpectrums(cv::gpu::GpuMat(a), cv::gpu::GpuMat(b), d_c, flag, false);
+        cv::gpu::mulSpectrums(loadMat(a), loadMat(b), d_c, flag, false);
 
         d_c.download(c);
     );
@@ -3828,9 +3443,6 @@ TEST_P(MulSpectrums, Simple)
 
 TEST_P(MulSpectrums, Scaled)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(flag);
-
     float scale = 1.f / a.size().area();
 
     cv::Mat c_gold;
@@ -3842,7 +3454,7 @@ TEST_P(MulSpectrums, Scaled)
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_c;
 
-        cv::gpu::mulAndScaleSpectrums(cv::gpu::GpuMat(a), cv::gpu::GpuMat(b), d_c, flag, scale, false);
+        cv::gpu::mulAndScaleSpectrums(loadMat(a), loadMat(b), d_c, flag, scale, false);
 
         d_c.download(c);
     );
@@ -3850,14 +3462,14 @@ TEST_P(MulSpectrums, Scaled)
     EXPECT_MAT_NEAR(c_gold, c, 1e-4);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(0, (int)cv::DFT_ROWS)));
+INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, Combine(
+                        ALL_DEVICES, 
+                        Values(0, (int) cv::DFT_ROWS)));
 
 ////////////////////////////////////////////////////////////////////////////
 // Dft
 
-struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct Dft : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -3870,17 +3482,13 @@ struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
 };
 
 
-static void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
+void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
 {
-    PRINT_PARAM(hint);
-    PRINT_PARAM(cols);
-    PRINT_PARAM(rows);
-    PRINT_PARAM(flags);
-    PRINT_PARAM(inplace);
+    SCOPED_TRACE(hint);
 
     cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-    cv::Mat a = cvtest::randomMat(rng, cv::Size(cols, rows), CV_32FC2, 0.0, 10.0, false);
+    cv::Mat a = randomMat(rng, cv::Size(cols, rows), CV_32FC2, 0.0, 10.0, false);
 
     cv::Mat b_gold;
     cv::dft(a, b_gold, flags);
@@ -3892,7 +3500,7 @@ static void testC2C(const std::string& hint, int cols, int rows, int flags, bool
         d_b_data.create(1, a.size().area(), CV_32FC2);
         d_b = cv::gpu::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
     }
-    cv::gpu::dft(cv::gpu::GpuMat(a), d_b, cv::Size(cols, rows), flags);
+    cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
 
     EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
     ASSERT_EQ(CV_32F, d_b.depth());
@@ -3902,8 +3510,6 @@ static void testC2C(const std::string& hint, int cols, int rows, int flags, bool
 
 TEST_P(Dft, C2C)
 {
-    PRINT_PARAM(devInfo);
-
     cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
     int cols = 2 + rng.next() % 100, rows = 2 + rng.next() % 100;
@@ -3930,16 +3536,13 @@ TEST_P(Dft, C2C)
     );
 }
 
-static void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
+void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
 {
-    PRINT_PARAM(hint);
-    PRINT_PARAM(cols);
-    PRINT_PARAM(rows);
-    PRINT_PARAM(inplace);
+    SCOPED_TRACE(hint);
     
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+    cv::RNG& rng = TS::ptr()->get_rng();
 
-    cv::Mat a = cvtest::randomMat(rng, cv::Size(cols, rows), CV_32FC1, 0.0, 10.0, false);
+    cv::Mat a = randomMat(rng, cv::Size(cols, rows), CV_32FC1, 0.0, 10.0, false);
 
     cv::gpu::GpuMat d_b, d_c;
     cv::gpu::GpuMat d_b_data, d_c_data;
@@ -3959,7 +3562,7 @@ static void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inp
         d_c = cv::gpu::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
     }
 
-    cv::gpu::dft(cv::gpu::GpuMat(a), d_b, cv::Size(cols, rows), 0);
+    cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
     cv::gpu::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
     
     EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
@@ -3973,9 +3576,7 @@ static void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inp
 
 TEST_P(Dft, R2CThenC2R)
 {
-    PRINT_PARAM(devInfo);
-
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+    cv::RNG& rng = TS::ptr()->get_rng();
 
     int cols = 2 + rng.next() % 100, rows = 2 + rng.next() % 100;
 
@@ -3998,12 +3599,13 @@ TEST_P(Dft, R2CThenC2R)
     );
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(ImgProc, Dft, ALL_DEVICES);
 
 ////////////////////////////////////////////////////////////////////////////
 // blend
 
-template <typename T> static void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
+template <typename T> 
+void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
 {
     result_gold.create(img1.size(), img1.type());
 
@@ -4016,6 +3618,7 @@ template <typename T> static void blendLinearGold(const cv::Mat& img1, const cv:
         const T* img1_row = img1.ptr<T>(y);
         const T* img2_row = img2.ptr<T>(y);
         T* result_gold_row = result_gold.ptr<T>(y);
+
         for (int x = 0; x < img1.cols * cn; ++x)
         {
             float w1 = weights1_row[x / cn];
@@ -4025,13 +3628,12 @@ template <typename T> static void blendLinearGold(const cv::Mat& img1, const cv:
     }
 }
 
-struct Blend : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(Blend, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
-    int depth;
-    int cn;
-
     int type;
+    bool useRoi;
+
     cv::Size size;
     cv::Mat img1;
     cv::Mat img2;
@@ -4042,22 +3644,22 @@ struct Blend : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        depth = std::tr1::get<1>(GetParam());
-        cn = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        type = CV_MAKETYPE(depth, cn);
+        cv::RNG& rng = TS::ptr()->get_rng();
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(200 + randInt(rng) % 1000, 200 + randInt(rng) % 1000);
 
-        size = cv::Size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);
+        int depth = CV_MAT_DEPTH(type);
 
-        img1 = cvtest::randomMat(rng, size, type, 0.0, depth == CV_8U ? 255.0 : 1.0, false);
-        img2 = cvtest::randomMat(rng, size, type, 0.0, depth == CV_8U ? 255.0 : 1.0, false);
-        weights1 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
-        weights2 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
+        img1 = randomMat(rng, size, type, 0.0, depth == CV_8U ? 255.0 : 1.0, false);
+        img2 = randomMat(rng, size, type, 0.0, depth == CV_8U ? 255.0 : 1.0, false);
+        weights1 = randomMat(rng, size, CV_32F, 0, 1, false);
+        weights2 = randomMat(rng, size, CV_32F, 0, 1, false);
         
         if (depth == CV_8U)
             blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
@@ -4068,35 +3670,32 @@ struct Blend : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
 TEST_P(Blend, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     cv::Mat result;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_result;
 
-        cv::gpu::blendLinear(cv::gpu::GpuMat(img1), cv::gpu::GpuMat(img2), cv::gpu::GpuMat(weights1), cv::gpu::GpuMat(weights2), d_result);
+        cv::gpu::blendLinear(loadMat(img1, useRoi), loadMat(img2, useRoi), loadMat(weights1, useRoi), loadMat(weights2, useRoi), d_result);
 
         d_result.download(result);
     );
 
-    EXPECT_MAT_NEAR(result_gold, result, depth == CV_8U ? 1.0 : 1e-5);
+    EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.0 : 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Blend, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(CV_8U, CV_32F),
-                        testing::Range(1, 5)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Blend, Combine(
+                        ALL_DEVICES,
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////
 // pyrDown
 
-struct PyrDown : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(PyrDown, cv::gpu::DeviceInfo, MatType, UseRoi)
 {    
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
     
     cv::Size size;
     cv::Mat src;
@@ -4105,33 +3704,30 @@ struct PyrDown : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, in
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 255.0, false);
+        src = randomMat(rng, size, type, 0.0, 255.0, false);
         
         cv::pyrDown(src, dst_gold);
     }
 };
 
 TEST_P(PyrDown, Accuracy)
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    
+{    
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_dst;
         
-        cv::gpu::pyrDown(cv::gpu::GpuMat(src), d_dst);
+        cv::gpu::pyrDown(loadMat(src, useRoi), d_dst);
         
         d_dst.download(dst);
     );
@@ -4140,24 +3736,23 @@ TEST_P(PyrDown, Accuracy)
     ASSERT_EQ(dst_gold.rows, dst.rows);
     ASSERT_EQ(dst_gold.type(), dst.type());
     
-    double err = cvtest::crossCorr(dst_gold, dst) / (cv::norm(dst_gold,cv::NORM_L2)*cv::norm(dst,cv::NORM_L2));
+    double err = crossCorr(dst_gold, dst) / (cv::norm(dst_gold, cv::NORM_L2) * cv::norm(dst, cv::NORM_L2));
     ASSERT_NEAR(err, 1., 1e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
-                                        CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4,
-                                        CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
-                                        CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4)));
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////
 // pyrUp
 
-struct PyrUp: testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(PyrUp, cv::gpu::DeviceInfo, MatType, UseRoi)
 {    
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
     
     cv::Size size;    
     cv::Mat src;
@@ -4166,33 +3761,30 @@ struct PyrUp: testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int>
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 255.0, false);
+        src = randomMat(rng, size, type, 0.0, 255.0, false);
         
         cv::pyrUp(src, dst_gold);
     }
 };
 
 TEST_P(PyrUp, Accuracy)
-{
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-    
+{    
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_dst;
         
-        cv::gpu::pyrUp(cv::gpu::GpuMat(src), d_dst);
+        cv::gpu::pyrUp(loadMat(src, useRoi), d_dst);
         
         d_dst.download(dst);
     );
@@ -4201,26 +3793,24 @@ TEST_P(PyrUp, Accuracy)
     ASSERT_EQ(dst_gold.rows, dst.rows);
     ASSERT_EQ(dst_gold.type(), dst.type());
 
-    double err = cvtest::crossCorr(dst_gold, dst) / (cv::norm(dst_gold,cv::NORM_L2)*cv::norm(dst,cv::NORM_L2));
+    double err = cvtest::crossCorr(dst_gold, dst) / (cv::norm(dst_gold, cv::NORM_L2) * cv::norm(dst, cv::NORM_L2));
     ASSERT_NEAR(err, 1., 1e-2);
 }
 
-
-INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
-                                        CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4,
-                                        CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
-                                        CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4)));
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, Combine(
+                        ALL_DEVICES, 
+                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////
 // Canny
 
-struct Canny : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, bool> >
+PARAM_TEST_CASE(Canny, cv::gpu::DeviceInfo, int, bool, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int apperture_size;
     bool L2gradient;
+    bool useRoi;
     
     cv::Mat img;
 
@@ -4231,9 +3821,10 @@ struct Canny : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        apperture_size = std::tr1::get<1>(GetParam());
-        L2gradient = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        apperture_size = GET_PARAM(1);
+        L2gradient = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
@@ -4249,16 +3840,12 @@ struct Canny : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
 
 TEST_P(Canny, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(apperture_size);
-    PRINT_PARAM(L2gradient);
-
     cv::Mat edges;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_edges;
 
-        cv::gpu::Canny(cv::gpu::GpuMat(img), d_edges, low_thresh, high_thresh, apperture_size, L2gradient);
+        cv::gpu::Canny(loadMat(img, useRoi), d_edges, low_thresh, high_thresh, apperture_size, L2gradient);
 
         d_edges.download(edges);
     );
@@ -4267,14 +3854,15 @@ TEST_P(Canny, Accuracy)
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
-                        testing::ValuesIn(devices()),
-                        testing::Values(3, 5),
-                        testing::Values(false, true)));
+                        DEVICES(cv::gpu::SHARED_ATOMICS),
+                        Values(3, 5),
+                        Values(false, true),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////
 // convolve
 
-struct Convolve: testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, int)
 {    
     cv::gpu::DeviceInfo devInfo;
     int ksize;
@@ -4287,34 +3875,31 @@ struct Convolve: testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, in
 
     virtual void SetUp()
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        ksize = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        ksize = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        src = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 255.0, false);
-        kernel = cvtest::randomMat(rng, cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0, false);
+        src = randomMat(rng, size, CV_32FC1, 0.0, 255.0, false);
+        kernel = randomMat(rng, cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0, false);
         
         cv::filter2D(src, dst_gold, CV_32F, kernel, cv::Point(-1, -1), 0, cv::BORDER_REPLICATE);
     }
 };
 
 TEST_P(Convolve, Accuracy)
-{
-    PRINT_PARAM(devInfo);
-    PRINT_PARAM(ksize);
-    
+{    
     cv::Mat dst;
 
     ASSERT_NO_THROW(
         cv::gpu::GpuMat d_dst;
 
-        cv::gpu::convolve(cv::gpu::GpuMat(src), cv::gpu::GpuMat(kernel), d_dst);
-        
+        cv::gpu::convolve(loadMat(src), loadMat(kernel), d_dst);
+
         d_dst.download(dst);
     );
 
@@ -4322,8 +3907,8 @@ TEST_P(Convolve, Accuracy)
 }
 
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::Values(3, 5, 7, 9, 11)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, Combine(
+                        ALL_DEVICES, 
+                        Values(3, 5, 7, 9, 11)));
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_main.cpp b/modules/gpu/test/test_main.cpp
index 05afdcdec..ae67c5349 100644
--- a/modules/gpu/test/test_main.cpp
+++ b/modules/gpu/test/test_main.cpp
@@ -42,8 +42,15 @@
 #include "test_precomp.hpp"
 
 #ifdef HAVE_CUDA
+
 #include <cuda_runtime_api.h>
 
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+using namespace testing;
+
 void print_info()
 {    
     printf("\n");
@@ -67,18 +74,18 @@ void print_info()
 #   endif
 #endif
 
-    int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
+    int deviceCount = getCudaEnabledDeviceCount();
     int driver;
     cudaDriverGetVersion(&driver);
 
     printf("CUDA Driver  version: %d\n", driver);        
     printf("CUDA Runtime version: %d\n", CUDART_VERSION);    
-    printf("CUDA device count: %d\n\n", deviceCount);
-    
+    printf("CUDA device count: %d\n\n", deviceCount);    
 
     for (int i = 0; i < deviceCount; ++i)
     {
-        cv::gpu::DeviceInfo info(i);
+        DeviceInfo info(i);
+
         printf("Device %d:\n", i);
         printf("    Name: %s\n", info.name().c_str());
         printf("    Compute capability version: %d.%d\n", info.majorVersion(), info.minorVersion());
@@ -106,14 +113,14 @@ extern OutputLevel nvidiaTestOutputLevel;
 
 int main(int argc, char** argv)
 {
-    cvtest::TS::ptr()->init("gpu");
-    testing::InitGoogleTest(&argc, argv);
+    TS::ptr()->init("gpu");
+    InitGoogleTest(&argc, argv);
 
     const char* keys ="{ nvtest_output_level | nvtest_output_level | none | NVidia test verbosity level }";
 
-    cv::CommandLineParser parser(argc, (const char**)argv, keys);
+    CommandLineParser parser(argc, (const char**)argv, keys);
 
-    std::string outputLevel = parser.get<std::string>("nvtest_output_level", "none");
+    string outputLevel = parser.get<string>("nvtest_output_level", "none");
 
     if (outputLevel == "none")
         nvidiaTestOutputLevel = OutputLevelNone;
@@ -123,6 +130,7 @@ int main(int argc, char** argv)
         nvidiaTestOutputLevel = OutputLevelFull;
 
     print_info();
+
     return RUN_ALL_TESTS();
 }
 
diff --git a/modules/gpu/test/test_matop.cpp b/modules/gpu/test/test_matop.cpp
index 800fc88c7..dbd483df8 100644
--- a/modules/gpu/test/test_matop.cpp
+++ b/modules/gpu/test/test_matop.cpp
@@ -44,13 +44,17 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
 ////////////////////////////////////////////////////////////////////////////////
 // merge
 
-struct Merge : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(Merge, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     std::vector<cv::Mat> src;
@@ -59,12 +63,13 @@ struct Merge : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int>
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
@@ -83,10 +88,6 @@ TEST_P(Merge, Accuracy)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     cv::Mat dst;
 
     ASSERT_NO_THROW(
@@ -94,7 +95,7 @@ TEST_P(Merge, Accuracy)
         cv::gpu::GpuMat dev_dst;
 
         for (size_t i = 0; i < src.size(); ++i)
-            dev_src.push_back(cv::gpu::GpuMat(src[i]));
+            dev_src.push_back(loadMat(src[i], useRoi));
 
         cv::gpu::merge(dev_src, dev_dst); 
 
@@ -104,17 +105,19 @@ TEST_P(Merge, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(MatOp, Merge, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::ValuesIn(all_types())));
+INSTANTIATE_TEST_CASE_P(MatOp, Merge, Combine(
+                        ALL_DEVICES, 
+                        ALL_TYPES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // split
 
-struct Split : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(Split, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
@@ -123,12 +126,13 @@ struct Split : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int>
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
@@ -143,17 +147,15 @@ TEST_P(Split, Accuracy)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     std::vector<cv::Mat> dst;
     
     ASSERT_NO_THROW(
         std::vector<cv::gpu::GpuMat> dev_dst;
-        cv::gpu::split(cv::gpu::GpuMat(src), dev_dst);
+
+        cv::gpu::split(loadMat(src, useRoi), dev_dst);
 
         dst.resize(dev_dst.size());
+
         for (size_t i = 0; i < dev_dst.size(); ++i)
             dev_dst[i].download(dst[i]);
     );
@@ -166,14 +168,15 @@ TEST_P(Split, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(MatOp, Split, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::ValuesIn(all_types())));
+INSTANTIATE_TEST_CASE_P(MatOp, Split, Combine(
+                        ALL_DEVICES, 
+                        ALL_TYPES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // split_merge_consistency
 
-struct SplitMerge : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(SplitMerge, cv::gpu::DeviceInfo, MatType)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
@@ -183,12 +186,12 @@ struct SplitMerge : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo,
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
@@ -202,17 +205,13 @@ TEST_P(SplitMerge, Consistency)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     cv::Mat final;
 
     ASSERT_NO_THROW(
         std::vector<cv::gpu::GpuMat> dev_vec;
         cv::gpu::GpuMat dev_final;
 
-        cv::gpu::split(cv::gpu::GpuMat(orig), dev_vec);    
+        cv::gpu::split(loadMat(orig), dev_vec);    
         cv::gpu::merge(dev_vec, dev_final);
 
         dev_final.download(final);
@@ -221,29 +220,31 @@ TEST_P(SplitMerge, Consistency)
     EXPECT_MAT_NEAR(orig, final, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(MatOp, SplitMerge, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::ValuesIn(all_types())));
+INSTANTIATE_TEST_CASE_P(MatOp, SplitMerge, Combine(
+                        ALL_DEVICES, 
+                        ALL_TYPES));
 
 ////////////////////////////////////////////////////////////////////////////////
 // setTo
 
-struct SetTo : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(SetTo, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat mat_gold;
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
@@ -256,16 +257,12 @@ TEST_P(SetTo, Zero)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    static cv::Scalar zero = cv::Scalar::all(0);
+    cv::Scalar zero = cv::Scalar::all(0);
 
     cv::Mat mat;
 
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_mat(mat_gold);
+        cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi);
 
         mat_gold.setTo(zero);
         dev_mat.setTo(zero);
@@ -281,11 +278,7 @@ TEST_P(SetTo, SameVal)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    static cv::Scalar s = cv::Scalar::all(1);
+    cv::Scalar s = cv::Scalar::all(1);
 
     cv::Mat mat;
 
@@ -306,16 +299,12 @@ TEST_P(SetTo, DifferentVal)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
+    cv::Scalar s = cv::Scalar(1, 2, 3, 4);
 
     cv::Mat mat;
 
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_mat(mat_gold);
+        cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi);
 
         mat_gold.setTo(s);
         dev_mat.setTo(s);
@@ -331,23 +320,18 @@ TEST_P(SetTo, Masked)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
-    static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
-
+    cv::Scalar s = cv::Scalar(1, 2, 3, 4);
     
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-    cv::Mat mask = cvtest::randomMat(rng, mat_gold.size(), CV_8UC1, 0.0, 1.5, false);
+    cv::RNG& rng = TS::ptr()->get_rng();
+    cv::Mat mask = randomMat(rng, mat_gold.size(), CV_8UC1, 0.0, 1.5, false);
 
     cv::Mat mat;
 
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_mat(mat_gold);
+        cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi);
 
         mat_gold.setTo(s, mask);
-        dev_mat.setTo(s, cv::gpu::GpuMat(mask));
+        dev_mat.setTo(s, loadMat(mask, useRoi));
 
         dev_mat.download(mat);
     );
@@ -355,33 +339,36 @@ TEST_P(SetTo, Masked)
     EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::ValuesIn(all_types())));
+INSTANTIATE_TEST_CASE_P(MatOp, SetTo, Combine(
+                        ALL_DEVICES, 
+                        ALL_TYPES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // copyTo
 
-struct CopyTo : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+PARAM_TEST_CASE(CopyTo, cv::gpu::DeviceInfo, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int type;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        type = std::tr1::get<1>(GetParam());
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        src = randomMat(rng, size, type, 0.0, 127.0, false);
     }
 };
 
@@ -390,19 +377,14 @@ TEST_P(CopyTo, WithoutMask)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
-
     cv::Mat dst_gold;
     src.copyTo(dst_gold);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_src(src);
-
-        cv::gpu::GpuMat dev_dst;
+        cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
+        cv::gpu::GpuMat dev_dst = loadMat(src, useRoi);
 
         dev_src.copyTo(dev_dst);
 
@@ -417,25 +399,22 @@ TEST_P(CopyTo, Masked)
     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(type);
-    PRINT_PARAM(size);
+    cv::RNG& rng = TS::ptr()->get_rng();
 
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+    cv::Mat mask = randomMat(rng, src.size(), CV_8UC1, 0.0, 2.0, false);
 
-    cv::Mat mask = cvtest::randomMat(rng, src.size(), CV_8UC1, 0.0, 2.0, false);
+    cv::Mat zeroMat(src.size(), src.type(), cv::Scalar::all(0));
 
-    cv::Mat dst_gold(src.size(), src.type(), cv::Scalar::all(0));
+    cv::Mat dst_gold = zeroMat.clone();
     src.copyTo(dst_gold, mask);
 
     cv::Mat dst;
 
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_src(src);
+        cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
+        cv::gpu::GpuMat dev_dst = loadMat(zeroMat, useRoi);
 
-        cv::gpu::GpuMat dev_dst(src.size(), src.type(), cv::Scalar::all(0));
-
-        dev_src.copyTo(dev_dst, cv::gpu::GpuMat(mask));
+        dev_src.copyTo(dev_dst, loadMat(mask, useRoi));
 
         dev_dst.download(dst);
     );
@@ -443,35 +422,38 @@ TEST_P(CopyTo, Masked)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(MatOp, CopyTo, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::ValuesIn(all_types())));
+INSTANTIATE_TEST_CASE_P(MatOp, CopyTo, Combine(
+                        ALL_DEVICES, 
+                        ALL_TYPES,
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // convertTo
 
-struct ConvertTo : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+PARAM_TEST_CASE(ConvertTo, cv::gpu::DeviceInfo, MatType, MatType, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     int depth1;
     int depth2;
+    bool useRoi;
 
     cv::Size size;
     cv::Mat src;
 
     virtual void SetUp() 
     {
-        devInfo = std::tr1::get<0>(GetParam());
-        depth1 = std::tr1::get<1>(GetParam());
-        depth2 = std::tr1::get<2>(GetParam());
+        devInfo = GET_PARAM(0);
+        depth1 = GET_PARAM(1);
+        depth2 = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
 
         cv::gpu::setDevice(devInfo.deviceID());
         
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
 
-        src = cvtest::randomMat(rng, size, depth1, 0.0, 127.0, false);
+        src = randomMat(rng, size, depth1, 0.0, 127.0, false);
     }
 };
 
@@ -480,19 +462,13 @@ TEST_P(ConvertTo, WithoutScaling)
     if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
 
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(depth1);
-    PRINT_TYPE(depth2);
-    PRINT_PARAM(size);
-
     cv::Mat dst_gold;
     src.convertTo(dst_gold, depth2);
 
     cv::Mat dst;
     
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_src(src);
-
+        cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
         cv::gpu::GpuMat dev_dst;
 
         dev_src.convertTo(dev_dst, depth2);
@@ -507,19 +483,11 @@ TEST_P(ConvertTo, WithScaling)
 {
     if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
         return;
-
-    PRINT_PARAM(devInfo);
-    PRINT_TYPE(depth1);
-    PRINT_TYPE(depth2);
-    PRINT_PARAM(size);
     
-    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+    cv::RNG& rng = TS::ptr()->get_rng();
 
     const double a = rng.uniform(0.0, 1.0);
     const double b = rng.uniform(-10.0, 10.0);
-    
-    PRINT_PARAM(a);
-    PRINT_PARAM(b);
 
     cv::Mat dst_gold;
     src.convertTo(dst_gold, depth2, a, b);
@@ -527,7 +495,7 @@ TEST_P(ConvertTo, WithScaling)
     cv::Mat dst;
     
     ASSERT_NO_THROW(
-        cv::gpu::GpuMat dev_src(src);
+        cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
 
         cv::gpu::GpuMat dev_dst;
 
@@ -541,15 +509,16 @@ TEST_P(ConvertTo, WithScaling)
     EXPECT_MAT_NEAR(dst_gold, dst, eps);
 }
 
-INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
-                        testing::ValuesIn(devices()), 
-                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)), 
-                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1))));
+INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, Combine(
+                        ALL_DEVICES, 
+                        TYPES(CV_8U, CV_64F, 1, 1),
+                        TYPES(CV_8U, CV_64F, 1, 1),
+                        USE_ROI));
 
 ////////////////////////////////////////////////////////////////////////////////
 // async
 
-struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct Async : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
 
@@ -564,7 +533,7 @@ struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>
 
         cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        cv::RNG& rng = TS::ptr()->get_rng();
 
         int rows = rng.uniform(100, 200);
         int cols = rng.uniform(100, 200);
@@ -578,8 +547,6 @@ struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>
 
 TEST_P(Async, Accuracy)
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat dst0, dst1;
     
     ASSERT_NO_THROW(
@@ -611,6 +578,6 @@ TEST_P(Async, Accuracy)
     EXPECT_MAT_NEAR(dst_gold1, dst1, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(MatOp, Async, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(MatOp, Async, ALL_DEVICES);
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_nvidia.cpp b/modules/gpu/test/test_nvidia.cpp
index 2620dd7f4..201723baa 100644
--- a/modules/gpu/test/test_nvidia.cpp
+++ b/modules/gpu/test/test_nvidia.cpp
@@ -43,6 +43,9 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
 enum OutputLevel
 {
     OutputLevelNone,
@@ -62,27 +65,22 @@ bool nvidia_NCV_Haar_Cascade_Application(const std::string& test_data_path, Outp
 bool nvidia_NCV_Hypotheses_Filtration(const std::string& test_data_path, OutputLevel outputLevel);
 bool nvidia_NCV_Visualization(const std::string& test_data_path, OutputLevel outputLevel);
 
-struct NVidiaTest : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct NVidiaTest : TestWithParam<cv::gpu::DeviceInfo>
 {
-    static std::string path;
-
     cv::gpu::DeviceInfo devInfo;
 
-    static void SetUpTestCase() 
-    {
-        path = std::string(cvtest::TS::ptr()->get_data_path()) + "haarcascade/";
-    }
+    std::string path;
 
     virtual void SetUp() 
     {
         devInfo = GetParam();
 
         cv::gpu::setDevice(devInfo.deviceID());
+
+        path = std::string(TS::ptr()->get_data_path()) + "haarcascade/";
     }
 };
 
-std::string NVidiaTest::path;
-
 struct NPPST : NVidiaTest {};
 struct NCV : NVidiaTest {};
 
@@ -90,8 +88,6 @@ OutputLevel nvidiaTestOutputLevel = OutputLevelNone;
 
 TEST_P(NPPST, Integral) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -103,8 +99,6 @@ TEST_P(NPPST, Integral)
 
 TEST_P(NPPST, SquaredIntegral) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -116,8 +110,6 @@ TEST_P(NPPST, SquaredIntegral)
 
 TEST_P(NPPST, RectStdDev) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -129,8 +121,6 @@ TEST_P(NPPST, RectStdDev)
 
 TEST_P(NPPST, Resize) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -142,8 +132,6 @@ TEST_P(NPPST, Resize)
 
 TEST_P(NPPST, VectorOperations) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -155,8 +143,6 @@ TEST_P(NPPST, VectorOperations)
 
 TEST_P(NPPST, Transpose) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -168,8 +154,6 @@ TEST_P(NPPST, Transpose)
 
 TEST_P(NCV, VectorOperations) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -181,8 +165,6 @@ TEST_P(NCV, VectorOperations)
 
 TEST_P(NCV, HaarCascadeLoader) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -194,8 +176,6 @@ TEST_P(NCV, HaarCascadeLoader)
 
 TEST_P(NCV, HaarCascadeApplication) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -207,8 +187,6 @@ TEST_P(NCV, HaarCascadeApplication)
 
 TEST_P(NCV, HypothesesFiltration) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -220,8 +198,6 @@ TEST_P(NCV, HypothesesFiltration)
 
 TEST_P(NCV, Visualization) 
 {
-    PRINT_PARAM(devInfo);
-
     bool res;
 
     ASSERT_NO_THROW(
@@ -231,7 +207,7 @@ TEST_P(NCV, Visualization)
     ASSERT_TRUE(res);
 }
 
-INSTANTIATE_TEST_CASE_P(NVidia, NPPST, testing::ValuesIn(devices()));
-INSTANTIATE_TEST_CASE_P(NVidia, NCV, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(NVidia, NPPST, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(NVidia, NCV, ALL_DEVICES);
 
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index 5ddafaab3..f40fc7b3b 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -56,6 +56,7 @@
 #include "opencv2/calib3d/calib3d.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
 #include "opencv2/gpu/gpu.hpp"
 #include "test_gpu_base.hpp"
 
diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp
index fe9d784fd..73b2ec4de 100644
--- a/modules/gpu/test/test_video.cpp
+++ b/modules/gpu/test/test_video.cpp
@@ -43,6 +43,9 @@
 
 #ifdef HAVE_CUDA
 
+using namespace cvtest;
+using namespace testing;
+
 //#define DUMP
 
 #define OPTICAL_FLOW_DUMP_FILE            "opticalflow/opticalflow_gold.bin"
@@ -50,7 +53,10 @@
 #define INTERPOLATE_FRAMES_DUMP_FILE      "opticalflow/interpolate_frames_gold.bin"
 #define INTERPOLATE_FRAMES_DUMP_FILE_CC20 "opticalflow/interpolate_frames_gold_cc20.bin"
 
-struct BroxOpticalFlow : testing::TestWithParam< cv::gpu::DeviceInfo >
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// BroxOpticalFlow
+
+struct BroxOpticalFlow : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
     
@@ -105,8 +111,6 @@ struct BroxOpticalFlow : testing::TestWithParam< cv::gpu::DeviceInfo >
 
 TEST_P(BroxOpticalFlow, Regression)
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat u;
     cv::Mat v;
 
@@ -149,9 +153,12 @@ TEST_P(BroxOpticalFlow, Regression)
 #endif
 }
 
-INSTANTIATE_TEST_CASE_P(Video, BroxOpticalFlow, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Video, BroxOpticalFlow, ALL_DEVICES);
 
-struct InterpolateFrames : testing::TestWithParam< cv::gpu::DeviceInfo >
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// InterpolateFrames
+
+struct InterpolateFrames : TestWithParam<cv::gpu::DeviceInfo>
 {
     cv::gpu::DeviceInfo devInfo;
     
@@ -200,8 +207,6 @@ struct InterpolateFrames : testing::TestWithParam< cv::gpu::DeviceInfo >
 
 TEST_P(InterpolateFrames, Regression)
 {
-    PRINT_PARAM(devInfo);
-
     cv::Mat newFrame;
 
     cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 
@@ -246,6 +251,6 @@ TEST_P(InterpolateFrames, Regression)
 #endif
 }
 
-INSTANTIATE_TEST_CASE_P(Video, InterpolateFrames, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(Video, InterpolateFrames, ALL_DEVICES);
 
 #endif