From 1b00a3ed54f1ce2000418229357e04ddebaaadcd Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:35:54 +0400
Subject: [PATCH 01/49] gpucodec module for video decoding/encoding

---
 cmake/OpenCVDetectCUDA.cmake                  |   3 +
 modules/gpu/CMakeLists.txt                    |  13 -
 modules/gpu/doc/video.rst                     | 447 ------------------
 modules/gpu/include/opencv2/gpu.hpp           | 205 --------
 modules/gpu/perf/perf_video.cpp               | 100 ----
 modules/gpu/src/cuda/NV12ToARGB.cu            | 201 --------
 modules/gpu/src/cuda/rgb_to_yv12.cu           | 175 -------
 modules/gpu/src/precomp.hpp                   |  14 -
 modules/gpu/src/thread_wrappers.cpp           | 254 ----------
 modules/gpu/src/video_decoder.h               | 116 -----
 modules/gpucodec/CMakeLists.txt               |  29 ++
 modules/gpucodec/doc/gpucodec.rst             |   9 +
 modules/gpucodec/doc/videodec.rst             | 234 +++++++++
 modules/gpucodec/doc/videoenc.rst             | 219 +++++++++
 modules/gpucodec/include/opencv2/gpucodec.hpp | 265 +++++++++++
 modules/gpucodec/perf/perf_main.cpp           |  47 ++
 modules/gpucodec/perf/perf_precomp.cpp        |  43 ++
 modules/gpucodec/perf/perf_precomp.hpp        |  64 +++
 modules/gpucodec/perf/perf_video.cpp          | 162 +++++++
 modules/gpucodec/src/cuda/nv12_to_rgb.cu      | 193 ++++++++
 modules/gpucodec/src/cuda/rgb_to_yv12.cu      | 170 +++++++
 .../src/cuvid_video_source.cpp                |  11 +-
 .../src/cuvid_video_source.h                  |  66 ++-
 .../src/ffmpeg_video_source.cpp               |  22 +-
 .../src/ffmpeg_video_source.h                 |  56 +--
 modules/{gpu => gpucodec}/src/frame_queue.cpp |  10 +-
 modules/{gpu => gpucodec}/src/frame_queue.h   |  76 ++-
 modules/gpucodec/src/precomp.cpp              |  43 ++
 modules/gpucodec/src/precomp.hpp              |  79 ++++
 modules/gpucodec/src/thread.cpp               | 174 +++++++
 .../src/thread.h}                             |  71 +--
 .../{gpu => gpucodec}/src/video_decoder.cpp   |   7 +-
 modules/gpucodec/src/video_decoder.h          | 111 +++++
 .../{gpu => gpucodec}/src/video_parser.cpp    |   6 +-
 modules/{gpu => gpucodec}/src/video_parser.h  |  84 ++--
 .../{gpu => gpucodec}/src/video_reader.cpp    |  57 +--
 .../{gpu => gpucodec}/src/video_writer.cpp    |  44 +-
 modules/gpucodec/test/test_main.cpp           |  45 ++
 modules/gpucodec/test/test_precomp.cpp        |  43 ++
 modules/gpucodec/test/test_precomp.hpp        |  60 +++
 modules/{gpu => gpucodec}/test/test_video.cpp |  54 +--
 modules/superres/CMakeLists.txt               |   2 +-
 modules/superres/src/frame_source.cpp         |   6 +-
 modules/superres/src/precomp.hpp              |   4 +
 samples/gpu/CMakeLists.txt                    |   3 +-
 samples/gpu/video_reader.cpp                  |   8 +-
 samples/gpu/video_writer.cpp                  |   8 +-
 47 files changed, 2247 insertions(+), 1866 deletions(-)
 delete mode 100644 modules/gpu/src/cuda/NV12ToARGB.cu
 delete mode 100644 modules/gpu/src/cuda/rgb_to_yv12.cu
 delete mode 100644 modules/gpu/src/thread_wrappers.cpp
 delete mode 100644 modules/gpu/src/video_decoder.h
 create mode 100644 modules/gpucodec/CMakeLists.txt
 create mode 100644 modules/gpucodec/doc/gpucodec.rst
 create mode 100644 modules/gpucodec/doc/videodec.rst
 create mode 100644 modules/gpucodec/doc/videoenc.rst
 create mode 100644 modules/gpucodec/include/opencv2/gpucodec.hpp
 create mode 100644 modules/gpucodec/perf/perf_main.cpp
 create mode 100644 modules/gpucodec/perf/perf_precomp.cpp
 create mode 100644 modules/gpucodec/perf/perf_precomp.hpp
 create mode 100644 modules/gpucodec/perf/perf_video.cpp
 create mode 100644 modules/gpucodec/src/cuda/nv12_to_rgb.cu
 create mode 100644 modules/gpucodec/src/cuda/rgb_to_yv12.cu
 rename modules/{gpu => gpucodec}/src/cuvid_video_source.cpp (96%)
 rename modules/{gpu => gpucodec}/src/cuvid_video_source.h (61%)
 rename modules/{gpu => gpucodec}/src/ffmpeg_video_source.cpp (94%)
 rename modules/{gpu => gpucodec}/src/ffmpeg_video_source.h (69%)
 rename modules/{gpu => gpucodec}/src/frame_queue.cpp (94%)
 rename modules/{gpu => gpucodec}/src/frame_queue.h (55%)
 create mode 100644 modules/gpucodec/src/precomp.cpp
 create mode 100644 modules/gpucodec/src/precomp.hpp
 create mode 100644 modules/gpucodec/src/thread.cpp
 rename modules/{gpu/src/thread_wrappers.h => gpucodec/src/thread.h} (61%)
 rename modules/{gpu => gpucodec}/src/video_decoder.cpp (97%)
 create mode 100644 modules/gpucodec/src/video_decoder.h
 rename modules/{gpu => gpucodec}/src/video_parser.cpp (98%)
 rename modules/{gpu => gpucodec}/src/video_parser.h (54%)
 rename modules/{gpu => gpucodec}/src/video_reader.cpp (89%)
 rename modules/{gpu => gpucodec}/src/video_writer.cpp (97%)
 create mode 100644 modules/gpucodec/test/test_main.cpp
 create mode 100644 modules/gpucodec/test/test_precomp.cpp
 create mode 100644 modules/gpucodec/test/test_precomp.hpp
 rename modules/{gpu => gpucodec}/test/test_video.cpp (75%)

diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake
index f3d101ab2..f1861fba7 100644
--- a/cmake/OpenCVDetectCUDA.cmake
+++ b/cmake/OpenCVDetectCUDA.cmake
@@ -28,6 +28,9 @@ if(CUDA_FOUND)
 
   if(WITH_NVCUVID)
     find_cuda_helper_libs(nvcuvid)
+    if(WIN32)
+      find_cuda_helper_libs(nvcuvenc)
+    endif()
     set(HAVE_NVCUVID 1)
   endif()
 
diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index f01a23b84..6f2f1145e 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -39,19 +39,6 @@ if(HAVE_CUDA)
   ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda})
 
   set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
-
-  if(WITH_NVCUVID)
-    set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY})
-  endif()
-
-  if(WIN32)
-    find_cuda_helper_libs(nvcuvenc)
-    set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY})
-  endif()
-
-  if(WITH_FFMPEG)
-    set(cuda_link_libs ${cuda_link_libs} ${HIGHGUI_LIBRARIES})
-  endif()
 else()
   set(lib_cuda "")
   set(cuda_objs "")
diff --git a/modules/gpu/doc/video.rst b/modules/gpu/doc/video.rst
index f96410037..bb7c8263e 100644
--- a/modules/gpu/doc/video.rst
+++ b/modules/gpu/doc/video.rst
@@ -687,453 +687,6 @@ Releases all inner buffer's memory.
 
 
 
-gpu::VideoWriter_GPU
----------------------
-Video writer class.
-
-.. ocv:class:: gpu::VideoWriter_GPU
-
-The class uses H264 video codec.
-
-.. note:: Currently only Windows platform is supported.
-
-
-
-gpu::VideoWriter_GPU::VideoWriter_GPU
--------------------------------------
-Constructors.
-
-.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU()
-.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
-.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
-.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
-.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
-
-    :param fileName: Name of the output video file. Only AVI file format is supported.
-
-    :param frameSize: Size of the input video frames.
-
-    :param fps: Framerate of the created video stream.
-
-    :param params: Encoder parameters. See :ocv:struct:`gpu::VideoWriter_GPU::EncoderParams` .
-
-    :param format: Surface format of input frames ( ``SF_UYVY`` , ``SF_YUY2`` , ``SF_YV12`` , ``SF_NV12`` , ``SF_IYUV`` , ``SF_BGR`` or ``SF_GRAY``). BGR or gray frames will be converted to YV12 format before encoding, frames with other formats will be used as is.
-
-    :param encoderCallback: Callbacks for video encoder. See :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . Use it if you want to work with raw video stream.
-
-The constructors initialize video writer. FFMPEG is used to write videos. User can implement own multiplexing with :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` .
-
-
-
-gpu::VideoWriter_GPU::open
---------------------------
-Initializes or reinitializes video writer.
-
-.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
-.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
-.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
-.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
-
-The method opens video writer. Parameters are the same as in the constructor :ocv:func:`gpu::VideoWriter_GPU::VideoWriter_GPU` . The method throws :ocv:class:`Exception` if error occurs.
-
-
-
-gpu::VideoWriter_GPU::isOpened
-------------------------------
-Returns true if video writer has been successfully initialized.
-
-.. ocv:function:: bool gpu::VideoWriter_GPU::isOpened() const
-
-
-
-gpu::VideoWriter_GPU::close
----------------------------
-Releases the video writer.
-
-.. ocv:function:: void gpu::VideoWriter_GPU::close()
-
-
-
-gpu::VideoWriter_GPU::write
----------------------------
-Writes the next video frame.
-
-.. ocv:function:: void gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame = false)
-
-    :param image: The written frame.
-
-    :param lastFrame: Indicates that it is end of stream. The parameter can be ignored.
-
-The method write the specified image to video file. The image must have the same size and the same surface format as has been specified when opening the video writer.
-
-
-
-gpu::VideoWriter_GPU::EncoderParams
------------------------------------
-.. ocv:struct:: gpu::VideoWriter_GPU::EncoderParams
-
-Different parameters for CUDA video encoder. ::
-
-    struct EncoderParams
-    {
-        int       P_Interval;      //    NVVE_P_INTERVAL,
-        int       IDR_Period;      //    NVVE_IDR_PERIOD,
-        int       DynamicGOP;      //    NVVE_DYNAMIC_GOP,
-        int       RCType;          //    NVVE_RC_TYPE,
-        int       AvgBitrate;      //    NVVE_AVG_BITRATE,
-        int       PeakBitrate;     //    NVVE_PEAK_BITRATE,
-        int       QP_Level_Intra;  //    NVVE_QP_LEVEL_INTRA,
-        int       QP_Level_InterP; //    NVVE_QP_LEVEL_INTER_P,
-        int       QP_Level_InterB; //    NVVE_QP_LEVEL_INTER_B,
-        int       DeblockMode;     //    NVVE_DEBLOCK_MODE,
-        int       ProfileLevel;    //    NVVE_PROFILE_LEVEL,
-        int       ForceIntra;      //    NVVE_FORCE_INTRA,
-        int       ForceIDR;        //    NVVE_FORCE_IDR,
-        int       ClearStat;       //    NVVE_CLEAR_STAT,
-        int       DIMode;          //    NVVE_SET_DEINTERLACE,
-        int       Presets;         //    NVVE_PRESETS,
-        int       DisableCabac;    //    NVVE_DISABLE_CABAC,
-        int       NaluFramingType; //    NVVE_CONFIGURE_NALU_FRAMING_TYPE
-        int       DisableSPSPPS;   //    NVVE_DISABLE_SPS_PPS
-
-        EncoderParams();
-        explicit EncoderParams(const String& configFile);
-
-        void load(const String& configFile);
-        void save(const String& configFile) const;
-    };
-
-
-
-gpu::VideoWriter_GPU::EncoderParams::EncoderParams
---------------------------------------------------
-Constructors.
-
-.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams()
-.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const String& configFile)
-
-    :param configFile: Config file name.
-
-Creates default parameters or reads parameters from config file.
-
-
-
-gpu::VideoWriter_GPU::EncoderParams::load
------------------------------------------
-Reads parameters from config file.
-
-.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::load(const String& configFile)
-
-    :param configFile: Config file name.
-
-
-
-gpu::VideoWriter_GPU::EncoderParams::save
------------------------------------------
-Saves parameters to config file.
-
-.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) const
-
-    :param configFile: Config file name.
-
-
-
-gpu::VideoWriter_GPU::EncoderCallBack
--------------------------------------
-.. ocv:class:: gpu::VideoWriter_GPU::EncoderCallBack
-
-Callbacks for CUDA video encoder. ::
-
-    class EncoderCallBack
-    {
-    public:
-        enum PicType
-        {
-            IFRAME = 1,
-            PFRAME = 2,
-            BFRAME = 3
-        };
-
-        virtual ~EncoderCallBack() {}
-
-        virtual unsigned char* acquireBitStream(int* bufferSize) = 0;
-        virtual void releaseBitStream(unsigned char* data, int size) = 0;
-        virtual void onBeginFrame(int frameNumber, PicType picType) = 0;
-        virtual void onEndFrame(int frameNumber, PicType picType) = 0;
-    };
-
-
-
-gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream
--------------------------------------------------------
-Callback function to signal the start of bitstream that is to be encoded.
-
-.. ocv:function:: virtual uchar* gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream(int* bufferSize) = 0
-
-Callback must allocate buffer for CUDA encoder and return pointer to it and it's size.
-
-
-
-gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream
--------------------------------------------------------
-Callback function to signal that the encoded bitstream is ready to be written to file.
-
-.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream(unsigned char* data, int size) = 0
-
-
-
-gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame
----------------------------------------------------
-Callback function to signal that the encoding operation on the frame has started.
-
-.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame(int frameNumber, PicType picType) = 0
-
-    :param picType: Specify frame type (I-Frame, P-Frame or B-Frame).
-
-
-
-gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame
--------------------------------------------------
-Callback function signals that the encoding operation on the frame has finished.
-
-.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame(int frameNumber, PicType picType) = 0
-
-    :param picType: Specify frame type (I-Frame, P-Frame or B-Frame).
-
-
-
-gpu::VideoReader_GPU
---------------------
-Class for reading video from files.
-
-.. ocv:class:: gpu::VideoReader_GPU
-
-.. note:: Currently only Windows and Linux platforms are supported.
-
-
-
-gpu::VideoReader_GPU::Codec
----------------------------
-
-Video codecs supported by :ocv:class:`gpu::VideoReader_GPU` .
-
-.. ocv:enum:: gpu::VideoReader_GPU::Codec
-
-  .. ocv:emember:: MPEG1 = 0
-  .. ocv:emember:: MPEG2
-  .. ocv:emember:: MPEG4
-  .. ocv:emember:: VC1
-  .. ocv:emember:: H264
-  .. ocv:emember:: JPEG
-  .. ocv:emember:: H264_SVC
-  .. ocv:emember:: H264_MVC
-
-  .. ocv:emember:: Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V'))
-
-        Y,U,V (4:2:0)
-
-  .. ocv:emember:: Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2'))
-
-        Y,V,U (4:2:0)
-
-  .. ocv:emember:: Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2'))
-
-        Y,UV  (4:2:0)
-
-  .. ocv:emember:: Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V'))
-
-        YUYV/YUY2 (4:2:2)
-
-  .. ocv:emember:: Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))
-
-        UYVY (4:2:2)
-
-
-gpu::VideoReader_GPU::ChromaFormat
-----------------------------------
-
-Chroma formats supported by :ocv:class:`gpu::VideoReader_GPU` .
-
-.. ocv:enum:: gpu::VideoReader_GPU::ChromaFormat
-
-  .. ocv:emember:: Monochrome = 0
-  .. ocv:emember:: YUV420
-  .. ocv:emember:: YUV422
-  .. ocv:emember:: YUV444
-
-
-gpu::VideoReader_GPU::FormatInfo
---------------------------------
-.. ocv:struct:: gpu::VideoReader_GPU::FormatInfo
-
-Struct providing information about video file format. ::
-
-    struct FormatInfo
-    {
-        Codec codec;
-        ChromaFormat chromaFormat;
-        int width;
-        int height;
-    };
-
-
-gpu::VideoReader_GPU::VideoReader_GPU
--------------------------------------
-Constructors.
-
-.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU()
-.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const String& filename)
-.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const cv::Ptr<VideoSource>& source)
-
-    :param filename: Name of the input video file.
-
-    :param source: Video file parser implemented by user.
-
-The constructors initialize video reader. FFMPEG is used to read videos. User can implement own demultiplexing with :ocv:class:`gpu::VideoReader_GPU::VideoSource` .
-
-
-
-gpu::VideoReader_GPU::open
---------------------------
-Initializes or reinitializes video reader.
-
-.. ocv:function:: void gpu::VideoReader_GPU::open(const String& filename)
-.. ocv:function:: void gpu::VideoReader_GPU::open(const cv::Ptr<VideoSource>& source)
-
-The method opens video reader. Parameters are the same as in the constructor :ocv:func:`gpu::VideoReader_GPU::VideoReader_GPU` . The method throws :ocv:class:`Exception` if error occurs.
-
-
-
-gpu::VideoReader_GPU::isOpened
-------------------------------
-Returns true if video reader has been successfully initialized.
-
-.. ocv:function:: bool gpu::VideoReader_GPU::isOpened() const
-
-
-
-gpu::VideoReader_GPU::close
----------------------------
-Releases the video reader.
-
-.. ocv:function:: void gpu::VideoReader_GPU::close()
-
-
-
-gpu::VideoReader_GPU::read
---------------------------
-Grabs, decodes and returns the next video frame.
-
-.. ocv:function:: bool gpu::VideoReader_GPU::read(GpuMat& image)
-
-If no frames has been grabbed (there are no more frames in video file), the methods return ``false`` . The method throws :ocv:class:`Exception` if error occurs.
-
-
-
-gpu::VideoReader_GPU::format
-----------------------------
-Returns information about video file format.
-
-.. ocv:function:: FormatInfo gpu::VideoReader_GPU::format() const
-
-The method throws :ocv:class:`Exception` if video reader wasn't initialized.
-
-
-
-gpu::VideoReader_GPU::dumpFormat
---------------------------------
-Dump information about video file format to specified stream.
-
-.. ocv:function:: void gpu::VideoReader_GPU::dumpFormat(std::ostream& st)
-
-    :param st: Output stream.
-
-The method throws :ocv:class:`Exception` if video reader wasn't initialized.
-
-
-
-gpu::VideoReader_GPU::VideoSource
------------------------------------
-.. ocv:class:: gpu::VideoReader_GPU::VideoSource
-
-Interface for video demultiplexing. ::
-
-    class VideoSource
-    {
-    public:
-        VideoSource();
-        virtual ~VideoSource() {}
-
-        virtual FormatInfo format() const = 0;
-        virtual void start() = 0;
-        virtual void stop() = 0;
-        virtual bool isStarted() const = 0;
-        virtual bool hasError() const = 0;
-
-    protected:
-        bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream = false);
-    };
-
-User can implement own demultiplexing by implementing this interface.
-
-
-
-gpu::VideoReader_GPU::VideoSource::format
------------------------------------------
-Returns information about video file format.
-
-.. ocv:function:: virtual FormatInfo gpu::VideoReader_GPU::VideoSource::format() const = 0
-
-
-
-gpu::VideoReader_GPU::VideoSource::start
-----------------------------------------
-Starts processing.
-
-.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::start() = 0
-
-Implementation must create own thread with video processing and call periodic :ocv:func:`gpu::VideoReader_GPU::VideoSource::parseVideoData` .
-
-
-
-gpu::VideoReader_GPU::VideoSource::stop
----------------------------------------
-Stops processing.
-
-.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::stop() = 0
-
-
-
-gpu::VideoReader_GPU::VideoSource::isStarted
---------------------------------------------
-Returns ``true`` if processing was successfully started.
-
-.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::isStarted() const = 0
-
-
-
-gpu::VideoReader_GPU::VideoSource::hasError
--------------------------------------------
-Returns ``true`` if error occured during processing.
-
-.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::hasError() const = 0
-
-
-
-gpu::VideoReader_GPU::VideoSource::parseVideoData
--------------------------------------------------
-Parse next video frame. Implementation must call this method after new frame was grabbed.
-
-.. ocv:function:: bool gpu::VideoReader_GPU::VideoSource::parseVideoData(const uchar* data, size_t size, bool endOfStream = false)
-
-    :param data: Pointer to frame data. Can be ``NULL`` if ``endOfStream`` if ``true`` .
-
-    :param size: Size in bytes of current frame.
-
-    :param endOfStream: Indicates that it is end of stream.
-
-
-
 .. [Brox2004] T. Brox, A. Bruhn, N. Papenberg, J. Weickert. *High accuracy optical flow estimation based on a theory for warping*. ECCV 2004.
 .. [FGD2003] Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian. *Foreground Object Detection from Videos Containing Complex Background*. ACM MM2003 9p, 2003.
 .. [MOG2001] P. KadewTraKuPong and R. Bowden. *An improved adaptive background mixture model for real-time tracking with shadow detection*. Proc. 2nd European Workshop on Advanced Video-Based Surveillance Systems, 2001
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 84de397dc..0b13fc01d 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -2156,211 +2156,6 @@ private:
     GpuMat buf_;
 };
 
-////////////////////////////////// Video Encoding //////////////////////////////////
-
-// Works only under Windows
-// Supports olny H264 video codec and AVI files
-class CV_EXPORTS VideoWriter_GPU
-{
-public:
-    struct EncoderParams;
-
-    // Callbacks for video encoder, use it if you want to work with raw video stream
-    class EncoderCallBack;
-
-    enum SurfaceFormat
-    {
-        SF_UYVY = 0,
-        SF_YUY2,
-        SF_YV12,
-        SF_NV12,
-        SF_IYUV,
-        SF_BGR,
-        SF_GRAY = SF_BGR
-    };
-
-    VideoWriter_GPU();
-    VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-    VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
-    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
-    ~VideoWriter_GPU();
-
-    // all methods throws cv::Exception if error occurs
-    void open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-    void open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
-    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
-
-    bool isOpened() const;
-    void close();
-
-    void write(const cv::gpu::GpuMat& image, bool lastFrame = false);
-
-    struct CV_EXPORTS EncoderParams
-    {
-        int       P_Interval;      //    NVVE_P_INTERVAL,
-        int       IDR_Period;      //    NVVE_IDR_PERIOD,
-        int       DynamicGOP;      //    NVVE_DYNAMIC_GOP,
-        int       RCType;          //    NVVE_RC_TYPE,
-        int       AvgBitrate;      //    NVVE_AVG_BITRATE,
-        int       PeakBitrate;     //    NVVE_PEAK_BITRATE,
-        int       QP_Level_Intra;  //    NVVE_QP_LEVEL_INTRA,
-        int       QP_Level_InterP; //    NVVE_QP_LEVEL_INTER_P,
-        int       QP_Level_InterB; //    NVVE_QP_LEVEL_INTER_B,
-        int       DeblockMode;     //    NVVE_DEBLOCK_MODE,
-        int       ProfileLevel;    //    NVVE_PROFILE_LEVEL,
-        int       ForceIntra;      //    NVVE_FORCE_INTRA,
-        int       ForceIDR;        //    NVVE_FORCE_IDR,
-        int       ClearStat;       //    NVVE_CLEAR_STAT,
-        int       DIMode;          //    NVVE_SET_DEINTERLACE,
-        int       Presets;         //    NVVE_PRESETS,
-        int       DisableCabac;    //    NVVE_DISABLE_CABAC,
-        int       NaluFramingType; //    NVVE_CONFIGURE_NALU_FRAMING_TYPE
-        int       DisableSPSPPS;   //    NVVE_DISABLE_SPS_PPS
-
-        EncoderParams();
-        explicit EncoderParams(const String& configFile);
-
-        void load(const String& configFile);
-        void save(const String& configFile) const;
-    };
-
-    EncoderParams getParams() const;
-
-    class CV_EXPORTS EncoderCallBack
-    {
-    public:
-        enum PicType
-        {
-            IFRAME = 1,
-            PFRAME = 2,
-            BFRAME = 3
-        };
-
-        virtual ~EncoderCallBack() {}
-
-        // callback function to signal the start of bitstream that is to be encoded
-        // must return pointer to buffer
-        virtual uchar* acquireBitStream(int* bufferSize) = 0;
-
-        // callback function to signal that the encoded bitstream is ready to be written to file
-        virtual void releaseBitStream(unsigned char* data, int size) = 0;
-
-        // callback function to signal that the encoding operation on the frame has started
-        virtual void onBeginFrame(int frameNumber, PicType picType) = 0;
-
-        // callback function signals that the encoding operation on the frame has finished
-        virtual void onEndFrame(int frameNumber, PicType picType) = 0;
-    };
-
-private:
-    VideoWriter_GPU(const VideoWriter_GPU&);
-    VideoWriter_GPU& operator=(const VideoWriter_GPU&);
-
-    class Impl;
-    std::auto_ptr<Impl> impl_;
-};
-
-
-////////////////////////////////// Video Decoding //////////////////////////////////////////
-
-namespace detail
-{
-    class FrameQueue;
-    class VideoParser;
-}
-
-class CV_EXPORTS VideoReader_GPU
-{
-public:
-    enum Codec
-    {
-        MPEG1 = 0,
-        MPEG2,
-        MPEG4,
-        VC1,
-        H264,
-        JPEG,
-        H264_SVC,
-        H264_MVC,
-
-        Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   // Y,U,V (4:2:0)
-        Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,V,U (4:2:0)
-        Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,UV  (4:2:0)
-        Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   // YUYV/YUY2 (4:2:2)
-        Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')),   // UYVY (4:2:2)
-    };
-
-    enum ChromaFormat
-    {
-        Monochrome=0,
-        YUV420,
-        YUV422,
-        YUV444,
-    };
-
-    struct FormatInfo
-    {
-        Codec codec;
-        ChromaFormat chromaFormat;
-        int width;
-        int height;
-    };
-
-    class VideoSource;
-
-    VideoReader_GPU();
-    explicit VideoReader_GPU(const String& filename);
-    explicit VideoReader_GPU(const cv::Ptr<VideoSource>& source);
-
-    ~VideoReader_GPU();
-
-    void open(const String& filename);
-    void open(const cv::Ptr<VideoSource>& source);
-    bool isOpened() const;
-
-    void close();
-
-    bool read(GpuMat& image);
-
-    FormatInfo format() const;
-    void dumpFormat(std::ostream& st);
-
-    class CV_EXPORTS VideoSource
-    {
-    public:
-        VideoSource() : frameQueue_(0), videoParser_(0) {}
-        virtual ~VideoSource() {}
-
-        virtual FormatInfo format() const = 0;
-        virtual void start() = 0;
-        virtual void stop() = 0;
-        virtual bool isStarted() const = 0;
-        virtual bool hasError() const = 0;
-
-        void setFrameQueue(detail::FrameQueue* frameQueue) { frameQueue_ = frameQueue; }
-        void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; }
-
-    protected:
-        bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false);
-
-    private:
-        VideoSource(const VideoSource&);
-        VideoSource& operator =(const VideoSource&);
-
-        detail::FrameQueue* frameQueue_;
-        detail::VideoParser* videoParser_;
-    };
-
-private:
-    VideoReader_GPU(const VideoReader_GPU&);
-    VideoReader_GPU& operator =(const VideoReader_GPU&);
-
-    class Impl;
-    std::auto_ptr<Impl> impl_;
-};
-
 //! removes points (CV_32FC2, single row matrix) with zero mask value
 CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);
 
diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp
index c69b9606c..59efd2e4a 100644
--- a/modules/gpu/perf/perf_video.cpp
+++ b/modules/gpu/perf/perf_video.cpp
@@ -1005,103 +1005,3 @@ PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
 }
 
 #endif
-
-//////////////////////////////////////////////////////
-// VideoReader
-
-#if defined(HAVE_NVCUVID) && BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-PERF_TEST_P(Video, DISABLED_Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
-{
-    declare.time(20);
-
-    const string inputFile = perf::TestBase::getDataPath(GetParam());
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::VideoReader_GPU d_reader(inputFile);
-        ASSERT_TRUE( d_reader.isOpened() );
-
-        cv::gpu::GpuMat frame;
-
-        TEST_CYCLE_N(10) d_reader.read(frame);
-
-        GPU_SANITY_CHECK(frame);
-    }
-    else
-    {
-        cv::VideoCapture reader(inputFile);
-        ASSERT_TRUE( reader.isOpened() );
-
-        cv::Mat frame;
-
-        TEST_CYCLE_N(10) reader >> frame;
-
-        CPU_SANITY_CHECK(frame);
-    }
-}
-
-#endif
-
-//////////////////////////////////////////////////////
-// VideoWriter
-
-#if defined(HAVE_NVCUVID) && defined(WIN32)
-
-PERF_TEST_P(Video, DISABLED_Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
-{
-    declare.time(30);
-
-    const string inputFile = perf::TestBase::getDataPath(GetParam());
-    const string outputFile = cv::tempfile(".avi");
-
-    const double FPS = 25.0;
-
-    cv::VideoCapture reader(inputFile);
-    ASSERT_TRUE( reader.isOpened() );
-
-    cv::Mat frame;
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::VideoWriter_GPU d_writer;
-
-        cv::gpu::GpuMat d_frame;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            reader >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            d_frame.upload(frame);
-
-            if (!d_writer.isOpened())
-                d_writer.open(outputFile, frame.size(), FPS);
-
-            startTimer(); next();
-            d_writer.write(d_frame);
-            stopTimer();
-        }
-    }
-    else
-    {
-        cv::VideoWriter writer;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            reader >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (!writer.isOpened())
-                writer.open(outputFile, CV_FOURCC('X', 'V', 'I', 'D'), FPS, frame.size());
-
-            startTimer(); next();
-            writer.write(frame);
-            stopTimer();
-        }
-    }
-
-    SANITY_CHECK(frame);
-}
-
-#endif
diff --git a/modules/gpu/src/cuda/NV12ToARGB.cu b/modules/gpu/src/cuda/NV12ToARGB.cu
deleted file mode 100644
index 09906613f..000000000
--- a/modules/gpu/src/cuda/NV12ToARGB.cu
+++ /dev/null
@@ -1,201 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-/*
- * NV12ToARGB color space conversion CUDA kernel
- *
- * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
- * source and converts to output in ARGB format
- */
-
-#if !defined CUDA_DISABLER
-
-#include "opencv2/core/cuda/common.hpp"
-
-namespace cv { namespace gpu { namespace cudev {
-    namespace video_decoding
-    {
-        __constant__ uint constAlpha = ((uint)0xff << 24);
-
-        __constant__ float constHueColorSpaceMat[9];
-
-        void loadHueCSC(float hueCSC[9])
-        {
-            cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
-        }
-
-        __device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
-        {
-            float luma, chromaCb, chromaCr;
-
-            // Prepare for hue adjustment
-            luma     = (float)yuvi[0];
-            chromaCb = (float)((int)yuvi[1] - 512.0f);
-            chromaCr = (float)((int)yuvi[2] - 512.0f);
-
-           // Convert YUV To RGB with hue adjustment
-           *red   = (luma     * constHueColorSpaceMat[0]) +
-                    (chromaCb * constHueColorSpaceMat[1]) +
-                    (chromaCr * constHueColorSpaceMat[2]);
-
-           *green = (luma     * constHueColorSpaceMat[3]) +
-                    (chromaCb * constHueColorSpaceMat[4]) +
-                    (chromaCr * constHueColorSpaceMat[5]);
-
-           *blue  = (luma     * constHueColorSpaceMat[6]) +
-                    (chromaCb * constHueColorSpaceMat[7]) +
-                    (chromaCr * constHueColorSpaceMat[8]);
-        }
-
-        __device__ uint RGBAPACK_10bit(float red, float green, float blue, uint alpha)
-        {
-            uint ARGBpixel = 0;
-
-            // Clamp final 10 bit results
-            red   = ::fmin(::fmax(red,   0.0f), 1023.f);
-            green = ::fmin(::fmax(green, 0.0f), 1023.f);
-            blue  = ::fmin(::fmax(blue,  0.0f), 1023.f);
-
-            // Convert to 8 bit unsigned integers per color component
-            ARGBpixel = (((uint)blue  >> 2) |
-                        (((uint)green >> 2) << 8)  |
-                        (((uint)red   >> 2) << 16) |
-                        (uint)alpha);
-
-            return ARGBpixel;
-        }
-
-        // CUDA kernel for outputing the final ARGB output from NV12
-
-        #define COLOR_COMPONENT_BIT_SIZE 10
-        #define COLOR_COMPONENT_MASK     0x3FF
-
-        __global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
-                                   uint* dstImage, size_t nDestPitch,
-                                   uint width, uint height)
-        {
-            // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
-            const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
-            const int y = blockIdx.y *  blockDim.y       +  threadIdx.y;
-
-            if (x >= width || y >= height)
-                return;
-
-            // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
-            // if we move to texture we could read 4 luminance values
-
-            uint yuv101010Pel[2];
-
-            yuv101010Pel[0] = (srcImage[y * nSourcePitch + x    ]) << 2;
-            yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
-
-            const size_t chromaOffset = nSourcePitch * height;
-
-            const int y_chroma = y >> 1;
-
-            if (y & 1)  // odd scanline ?
-            {
-                uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x    ];
-                uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
-
-                if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
-                {
-                    chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x    ] + 1) >> 1;
-                    chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
-                }
-
-                yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
-                yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-
-                yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
-                yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-            }
-            else
-            {
-                yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
-                yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-
-                yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
-                yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-            }
-
-            // this steps performs the color conversion
-            uint yuvi[6];
-            float red[2], green[2], blue[2];
-
-            yuvi[0] =  (yuv101010Pel[0] &   COLOR_COMPONENT_MASK    );
-            yuvi[1] = ((yuv101010Pel[0] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
-            yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
-
-            yuvi[3] =  (yuv101010Pel[1] &   COLOR_COMPONENT_MASK    );
-            yuvi[4] = ((yuv101010Pel[1] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
-            yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
-
-            // YUV to RGB Transformation conversion
-            YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
-            YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
-
-            // Clamp the results to RGBA
-
-            const size_t dstImagePitch = nDestPitch >> 2;
-
-            dstImage[y * dstImagePitch + x     ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
-            dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
-        }
-
-        void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
-
-            NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
-                interopFrame.cols, interopFrame.rows);
-
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    }
-}}}
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/cuda/rgb_to_yv12.cu b/modules/gpu/src/cuda/rgb_to_yv12.cu
deleted file mode 100644
index 3e5664bab..000000000
--- a/modules/gpu/src/cuda/rgb_to_yv12.cu
+++ /dev/null
@@ -1,175 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined CUDA_DISABLER
-
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/vec_traits.hpp"
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace video_encoding
-    {
-        __device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y)
-        {
-            y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
-        }
-
-        __device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
-        {
-            rgbtoy(b, g, r, y);
-            u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
-            v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
-        }
-
-        __global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
-        {
-            const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
-            const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
-
-            if (x + 1 >= src.cols || y + 1 >= src.rows)
-                return;
-
-            // get pointers to the data
-            const size_t planeSize = src.rows * dst.step;
-            PtrStepb y_plane(dst.data, dst.step);
-            PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
-            PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
-
-            uchar pix;
-            uchar y_val, u_val, v_val;
-
-            pix = src(y, x);
-            rgbtoy(pix, pix, pix, y_val);
-            y_plane(y, x) = y_val;
-
-            pix = src(y, x + 1);
-            rgbtoy(pix, pix, pix, y_val);
-            y_plane(y, x + 1) = y_val;
-
-            pix = src(y + 1, x);
-            rgbtoy(pix, pix, pix, y_val);
-            y_plane(y + 1, x) = y_val;
-
-            pix = src(y + 1, x + 1);
-            rgbtoyuv(pix, pix, pix, y_val, u_val, v_val);
-            y_plane(y + 1, x + 1) = y_val;
-            u_plane(y / 2, x / 2) = u_val;
-            v_plane(y / 2, x / 2) = v_val;
-        }
-
-        template <typename T>
-        __global__ void BGR_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
-        {
-            const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
-            const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
-
-            if (x + 1 >= src.cols || y + 1 >= src.rows)
-                return;
-
-            // get pointers to the data
-            const size_t planeSize = src.rows * dst.step;
-            PtrStepb y_plane(dst.data, dst.step);
-            PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
-            PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
-
-            T pix;
-            uchar y_val, u_val, v_val;
-
-            pix = src(y, x);
-            rgbtoy(pix.z, pix.y, pix.x, y_val);
-            y_plane(y, x) = y_val;
-
-            pix = src(y, x + 1);
-            rgbtoy(pix.z, pix.y, pix.x, y_val);
-            y_plane(y, x + 1) = y_val;
-
-            pix = src(y + 1, x);
-            rgbtoy(pix.z, pix.y, pix.x, y_val);
-            y_plane(y + 1, x) = y_val;
-
-            pix = src(y + 1, x + 1);
-            rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
-            y_plane(y + 1, x + 1) = y_val;
-            u_plane(y / 2, x / 2) = u_val;
-            v_plane(y / 2, x / 2) = v_val;
-        }
-
-        void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
-
-            Gray_to_YV12<<<grid, block>>>(src, dst);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-        template <int cn>
-        void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
-        {
-            typedef typename TypeVec<uchar, cn>::vec_type src_t;
-
-            dim3 block(32, 8);
-            dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
-
-            BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
-        {
-            typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst);
-
-            static const func_t funcs[] =
-            {
-                0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
-            };
-
-            funcs[cn](src, dst);
-        }
-    }
-}}}
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index cc25ab2f9..aacc43f99 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -80,20 +80,6 @@
         #include <cublas.h>
     #endif
 
-    #ifdef HAVE_NVCUVID
-        #include <nvcuvid.h>
-
-        #ifdef WIN32
-            #include <windows.h>
-            #undef small
-            #undef min
-            #undef max
-            #undef abs
-
-            #include <NVEncoderAPI.h>
-        #endif
-    #endif
-
     #include "internal_shared.hpp"
     #include "opencv2/core/stream_accessor.hpp"
 
diff --git a/modules/gpu/src/thread_wrappers.cpp b/modules/gpu/src/thread_wrappers.cpp
deleted file mode 100644
index e8ee19e54..000000000
--- a/modules/gpu/src/thread_wrappers.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "thread_wrappers.h"
-
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
-
-#ifdef WIN32
-    #define NOMINMAX
-    #include <windows.h>
-#else
-    #include <pthread.h>
-    #include <unistd.h>
-#endif
-
-#ifdef WIN32
-    class cv::gpu::detail::CriticalSection::Impl
-    {
-    public:
-        Impl()
-        {
-            InitializeCriticalSection(&criticalSection_);
-        }
-
-        ~Impl()
-        {
-            DeleteCriticalSection(&criticalSection_);
-        }
-
-        void enter()
-        {
-            EnterCriticalSection(&criticalSection_);
-        }
-
-        void leave()
-        {
-            LeaveCriticalSection(&criticalSection_);
-        }
-
-    private:
-        CRITICAL_SECTION criticalSection_;
-    };
-#else
-    class cv::gpu::detail::CriticalSection::Impl
-    {
-    public:
-        Impl()
-        {
-            pthread_mutexattr_t mutex_attribute;
-            pthread_mutexattr_init(&mutex_attribute);
-            pthread_mutexattr_settype(&mutex_attribute, PTHREAD_MUTEX_RECURSIVE);
-            pthread_mutex_init(&mutex_, 0);
-            pthread_mutexattr_destroy(&mutex_attribute);
-        }
-
-        ~Impl()
-        {
-            pthread_mutex_destroy(&mutex_);
-        }
-
-        void enter()
-        {
-            pthread_mutex_lock(&mutex_);
-        }
-
-        void leave()
-        {
-            pthread_mutex_unlock(&mutex_);
-        }
-
-    private:
-        pthread_mutex_t mutex_;
-    };
-#endif
-
-cv::gpu::detail::CriticalSection::CriticalSection() :
-    impl_(new Impl)
-{
-}
-
-cv::gpu::detail::CriticalSection::~CriticalSection()
-{
-}
-
-void cv::gpu::detail::CriticalSection::enter()
-{
-    impl_->enter();
-}
-
-void cv::gpu::detail::CriticalSection::leave()
-{
-    impl_->leave();
-}
-
-#ifdef WIN32
-    namespace
-    {
-        struct UserData
-        {
-            void (*func)(void* userData);
-            void* param;
-        };
-
-        DWORD WINAPI WinThreadFunction(LPVOID lpParam)
-        {
-            UserData* userData = static_cast<UserData*>(lpParam);
-
-            userData->func(userData->param);
-
-            return 0;
-        }
-    }
-
-    class cv::gpu::detail::Thread::Impl
-    {
-    public:
-        Impl(void (*func)(void* userData), void* userData)
-        {
-            userData_.func = func;
-            userData_.param = userData;
-
-            thread_ = CreateThread(
-                NULL,                   // default security attributes
-                0,                      // use default stack size
-                WinThreadFunction,      // thread function name
-                &userData_,             // argument to thread function
-                0,                      // use default creation flags
-                &threadId_);            // returns the thread identifier
-        }
-
-        ~Impl()
-        {
-            CloseHandle(thread_);
-        }
-
-        void wait()
-        {
-            WaitForSingleObject(thread_, INFINITE);
-        }
-
-    private:
-        UserData userData_;
-        HANDLE thread_;
-        DWORD threadId_;
-    };
-#else
-    namespace
-    {
-        struct UserData
-        {
-            void (*func)(void* userData);
-            void* param;
-        };
-
-        void* PThreadFunction(void* lpParam)
-        {
-            UserData* userData = static_cast<UserData*>(lpParam);
-
-            userData->func(userData->param);
-
-            return 0;
-        }
-    }
-
-    class cv::gpu::detail::Thread::Impl
-    {
-    public:
-        Impl(void (*func)(void* userData), void* userData)
-        {
-            userData_.func = func;
-            userData_.param = userData;
-
-            pthread_create(&thread_, NULL, PThreadFunction, &userData_);
-        }
-
-        ~Impl()
-        {
-            pthread_detach(thread_);
-        }
-
-        void wait()
-        {
-            pthread_join(thread_, NULL);
-        }
-
-    private:
-        pthread_t thread_;
-        UserData userData_;
-    };
-#endif
-
-cv::gpu::detail::Thread::Thread(void (*func)(void* userData), void* userData) :
-    impl_(new Impl(func, userData))
-{
-}
-
-cv::gpu::detail::Thread::~Thread()
-{
-}
-
-void cv::gpu::detail::Thread::wait()
-{
-    impl_->wait();
-}
-
-void cv::gpu::detail::Thread::sleep(int ms)
-{
-#ifdef WIN32
-    ::Sleep(ms);
-#else
-    ::usleep(ms * 1000);
-#endif
-}
-
-#endif // HAVE_CUDA
diff --git a/modules/gpu/src/video_decoder.h b/modules/gpu/src/video_decoder.h
deleted file mode 100644
index 0c8f8e08f..000000000
--- a/modules/gpu/src/video_decoder.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __VIDEO_DECODER_H__
-#define __VIDEO_DECODER_H__
-
-#include "precomp.hpp"
-
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
-
-namespace cv { namespace gpu
-{
-    namespace detail
-    {
-        class VideoDecoder
-        {
-        public:
-            VideoDecoder(const VideoReader_GPU::FormatInfo& videoFormat, CUvideoctxlock lock) : lock_(lock), decoder_(0)
-            {
-                create(videoFormat);
-            }
-
-            ~VideoDecoder()
-            {
-                release();
-            }
-
-            void create(const VideoReader_GPU::FormatInfo& videoFormat);
-            void release();
-
-            // Get the code-type currently used.
-            cudaVideoCodec codec() const { return createInfo_.CodecType; }
-            unsigned long maxDecodeSurfaces() const { return createInfo_.ulNumDecodeSurfaces; }
-
-            unsigned long frameWidth() const { return createInfo_.ulWidth; }
-            unsigned long frameHeight() const { return createInfo_.ulHeight; }
-
-            unsigned long targetWidth() const { return createInfo_.ulTargetWidth; }
-            unsigned long targetHeight() const { return createInfo_.ulTargetHeight; }
-
-            cudaVideoChromaFormat chromaFormat() const { return createInfo_.ChromaFormat; }
-
-            bool decodePicture(CUVIDPICPARAMS* picParams)
-            {
-                return cuvidDecodePicture(decoder_, picParams) == CUDA_SUCCESS;
-            }
-
-            cv::gpu::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams)
-            {
-                CUdeviceptr ptr;
-                unsigned int pitch;
-
-                cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );
-
-                return GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
-            }
-
-            void unmapFrame(cv::gpu::GpuMat& frame)
-            {
-                cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) );
-                frame.release();
-            }
-
-        private:
-            VideoDecoder(const VideoDecoder&);
-            VideoDecoder& operator =(const VideoDecoder&);
-
-            CUvideoctxlock lock_;
-            CUVIDDECODECREATEINFO createInfo_;
-            CUvideodecoder        decoder_;
-        };
-    }
-}}
-
-#endif // HAVE_CUDA
-
-#endif // __VIDEO_DECODER_H__
diff --git a/modules/gpucodec/CMakeLists.txt b/modules/gpucodec/CMakeLists.txt
new file mode 100644
index 000000000..f03c201b3
--- /dev/null
+++ b/modules/gpucodec/CMakeLists.txt
@@ -0,0 +1,29 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpucodec)
+endif()
+
+set(the_description "GPU-accelerated Video Encoding/Decoding")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
+
+ocv_add_module(gpucodec opencv_highgui)
+
+ocv_module_include_directories()
+ocv_glob_module_sources()
+
+set(extra_libs ${HIGHGUI_LIBRARIES})
+
+if(HAVE_NVCUVID)
+  list(APPEND extra_libs ${CUDA_nvcuvid_LIBRARY})
+
+  if(WIN32)
+    list(APPEND extra_libs ${CUDA_nvcuvenc_LIBRARY})
+  endif()
+endif()
+
+ocv_create_module(${extra_libs})
+
+ocv_add_precompiled_headers(${the_module})
+
+ocv_add_accuracy_tests()
+ocv_add_perf_tests()
diff --git a/modules/gpucodec/doc/gpucodec.rst b/modules/gpucodec/doc/gpucodec.rst
new file mode 100644
index 000000000..b9f763f84
--- /dev/null
+++ b/modules/gpucodec/doc/gpucodec.rst
@@ -0,0 +1,9 @@
+*************************************************
+gpucodec. GPU-accelerated Video Encoding/Decoding
+*************************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    videodec
+    videoenc
diff --git a/modules/gpucodec/doc/videodec.rst b/modules/gpucodec/doc/videodec.rst
new file mode 100644
index 000000000..342203223
--- /dev/null
+++ b/modules/gpucodec/doc/videodec.rst
@@ -0,0 +1,234 @@
+Video Decoding
+==============
+
+.. highlight:: cpp
+
+
+
+gpu::VideoReader_GPU
+--------------------
+Video reader class.
+
+.. ocv:class:: gpu::VideoReader_GPU
+
+
+
+gpu::VideoReader_GPU::Codec
+---------------------------
+
+Video codecs supported by :ocv:class:`gpu::VideoReader_GPU` .
+
+.. ocv:enum:: gpu::VideoReader_GPU::Codec
+
+  .. ocv:emember:: MPEG1 = 0
+  .. ocv:emember:: MPEG2
+  .. ocv:emember:: MPEG4
+  .. ocv:emember:: VC1
+  .. ocv:emember:: H264
+  .. ocv:emember:: JPEG
+  .. ocv:emember:: H264_SVC
+  .. ocv:emember:: H264_MVC
+
+  .. ocv:emember:: Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V'))
+
+        Y,U,V (4:2:0)
+
+  .. ocv:emember:: Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2'))
+
+        Y,V,U (4:2:0)
+
+  .. ocv:emember:: Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2'))
+
+        Y,UV  (4:2:0)
+
+  .. ocv:emember:: Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V'))
+
+        YUYV/YUY2 (4:2:2)
+
+  .. ocv:emember:: Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))
+
+        UYVY (4:2:2)
+
+
+gpu::VideoReader_GPU::ChromaFormat
+----------------------------------
+
+Chroma formats supported by :ocv:class:`gpu::VideoReader_GPU` .
+
+.. ocv:enum:: gpu::VideoReader_GPU::ChromaFormat
+
+  .. ocv:emember:: Monochrome = 0
+  .. ocv:emember:: YUV420
+  .. ocv:emember:: YUV422
+  .. ocv:emember:: YUV444
+
+
+gpu::VideoReader_GPU::FormatInfo
+--------------------------------
+.. ocv:struct:: gpu::VideoReader_GPU::FormatInfo
+
+Struct providing information about video file format. ::
+
+    struct FormatInfo
+    {
+        Codec codec;
+        ChromaFormat chromaFormat;
+        int width;
+        int height;
+    };
+
+
+gpu::VideoReader_GPU::VideoReader_GPU
+-------------------------------------
+Constructors.
+
+.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU()
+.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const String& filename)
+.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const cv::Ptr<VideoSource>& source)
+
+    :param filename: Name of the input video file.
+
+    :param source: Video file parser implemented by user.
+
+The constructors initialize video reader. FFMPEG is used to read videos. User can implement own demultiplexing with :ocv:class:`gpu::VideoReader_GPU::VideoSource` .
+
+
+
+gpu::VideoReader_GPU::open
+--------------------------
+Initializes or reinitializes video reader.
+
+.. ocv:function:: void gpu::VideoReader_GPU::open(const String& filename)
+.. ocv:function:: void gpu::VideoReader_GPU::open(const cv::Ptr<VideoSource>& source)
+
+The method opens video reader. Parameters are the same as in the constructor :ocv:func:`gpu::VideoReader_GPU::VideoReader_GPU` . The method throws :ocv:class:`Exception` if error occurs.
+
+
+
+gpu::VideoReader_GPU::isOpened
+------------------------------
+Returns true if video reader has been successfully initialized.
+
+.. ocv:function:: bool gpu::VideoReader_GPU::isOpened() const
+
+
+
+gpu::VideoReader_GPU::close
+---------------------------
+Releases the video reader.
+
+.. ocv:function:: void gpu::VideoReader_GPU::close()
+
+
+
+gpu::VideoReader_GPU::read
+--------------------------
+Grabs, decodes and returns the next video frame.
+
+.. ocv:function:: bool gpu::VideoReader_GPU::read(GpuMat& image)
+
+If no frames has been grabbed (there are no more frames in video file), the methods return ``false`` . The method throws :ocv:class:`Exception` if error occurs.
+
+
+
+gpu::VideoReader_GPU::format
+----------------------------
+Returns information about video file format.
+
+.. ocv:function:: FormatInfo gpu::VideoReader_GPU::format() const
+
+The method throws :ocv:class:`Exception` if video reader wasn't initialized.
+
+
+
+gpu::VideoReader_GPU::dumpFormat
+--------------------------------
+Dump information about video file format to specified stream.
+
+.. ocv:function:: void gpu::VideoReader_GPU::dumpFormat(std::ostream& st)
+
+    :param st: Output stream.
+
+The method throws :ocv:class:`Exception` if video reader wasn't initialized.
+
+
+
+gpu::VideoReader_GPU::VideoSource
+-----------------------------------
+.. ocv:class:: gpu::VideoReader_GPU::VideoSource
+
+Interface for video demultiplexing. ::
+
+    class VideoSource
+    {
+    public:
+        VideoSource();
+        virtual ~VideoSource() {}
+
+        virtual FormatInfo format() const = 0;
+        virtual void start() = 0;
+        virtual void stop() = 0;
+        virtual bool isStarted() const = 0;
+        virtual bool hasError() const = 0;
+
+    protected:
+        bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream = false);
+    };
+
+User can implement own demultiplexing by implementing this interface.
+
+
+
+gpu::VideoReader_GPU::VideoSource::format
+-----------------------------------------
+Returns information about video file format.
+
+.. ocv:function:: virtual FormatInfo gpu::VideoReader_GPU::VideoSource::format() const = 0
+
+
+
+gpu::VideoReader_GPU::VideoSource::start
+----------------------------------------
+Starts processing.
+
+.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::start() = 0
+
+Implementation must create own thread with video processing and call periodic :ocv:func:`gpu::VideoReader_GPU::VideoSource::parseVideoData` .
+
+
+
+gpu::VideoReader_GPU::VideoSource::stop
+---------------------------------------
+Stops processing.
+
+.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::stop() = 0
+
+
+
+gpu::VideoReader_GPU::VideoSource::isStarted
+--------------------------------------------
+Returns ``true`` if processing was successfully started.
+
+.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::isStarted() const = 0
+
+
+
+gpu::VideoReader_GPU::VideoSource::hasError
+-------------------------------------------
+Returns ``true`` if error occured during processing.
+
+.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::hasError() const = 0
+
+
+
+gpu::VideoReader_GPU::VideoSource::parseVideoData
+-------------------------------------------------
+Parse next video frame. Implementation must call this method after new frame was grabbed.
+
+.. ocv:function:: bool gpu::VideoReader_GPU::VideoSource::parseVideoData(const uchar* data, size_t size, bool endOfStream = false)
+
+    :param data: Pointer to frame data. Can be ``NULL`` if ``endOfStream`` if ``true`` .
+
+    :param size: Size in bytes of current frame.
+
+    :param endOfStream: Indicates that it is end of stream.
diff --git a/modules/gpucodec/doc/videoenc.rst b/modules/gpucodec/doc/videoenc.rst
new file mode 100644
index 000000000..ec26e27ef
--- /dev/null
+++ b/modules/gpucodec/doc/videoenc.rst
@@ -0,0 +1,219 @@
+Video Encoding
+==============
+
+.. highlight:: cpp
+
+
+
+gpu::VideoWriter_GPU
+---------------------
+Video writer class.
+
+.. ocv:class:: gpu::VideoWriter_GPU
+
+The class uses H264 video codec.
+
+.. note:: Currently only Windows platform is supported.
+
+
+
+gpu::VideoWriter_GPU::VideoWriter_GPU
+-------------------------------------
+Constructors.
+
+.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU()
+.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
+.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
+.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
+.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
+
+    :param fileName: Name of the output video file. Only AVI file format is supported.
+
+    :param frameSize: Size of the input video frames.
+
+    :param fps: Framerate of the created video stream.
+
+    :param params: Encoder parameters. See :ocv:struct:`gpu::VideoWriter_GPU::EncoderParams` .
+
+    :param format: Surface format of input frames ( ``SF_UYVY`` , ``SF_YUY2`` , ``SF_YV12`` , ``SF_NV12`` , ``SF_IYUV`` , ``SF_BGR`` or ``SF_GRAY``). BGR or gray frames will be converted to YV12 format before encoding, frames with other formats will be used as is.
+
+    :param encoderCallback: Callbacks for video encoder. See :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . Use it if you want to work with raw video stream.
+
+The constructors initialize video writer. FFMPEG is used to write videos. User can implement own multiplexing with :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` .
+
+
+
+gpu::VideoWriter_GPU::open
+--------------------------
+Initializes or reinitializes video writer.
+
+.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
+.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
+.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR)
+.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR)
+
+The method opens video writer. Parameters are the same as in the constructor :ocv:func:`gpu::VideoWriter_GPU::VideoWriter_GPU` . The method throws :ocv:class:`Exception` if error occurs.
+
+
+
+gpu::VideoWriter_GPU::isOpened
+------------------------------
+Returns true if video writer has been successfully initialized.
+
+.. ocv:function:: bool gpu::VideoWriter_GPU::isOpened() const
+
+
+
+gpu::VideoWriter_GPU::close
+---------------------------
+Releases the video writer.
+
+.. ocv:function:: void gpu::VideoWriter_GPU::close()
+
+
+
+gpu::VideoWriter_GPU::write
+---------------------------
+Writes the next video frame.
+
+.. ocv:function:: void gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame = false)
+
+    :param image: The written frame.
+
+    :param lastFrame: Indicates that it is end of stream. The parameter can be ignored.
+
+The method write the specified image to video file. The image must have the same size and the same surface format as has been specified when opening the video writer.
+
+
+
+gpu::VideoWriter_GPU::EncoderParams
+-----------------------------------
+.. ocv:struct:: gpu::VideoWriter_GPU::EncoderParams
+
+Different parameters for CUDA video encoder. ::
+
+    struct EncoderParams
+    {
+        int       P_Interval;      //    NVVE_P_INTERVAL,
+        int       IDR_Period;      //    NVVE_IDR_PERIOD,
+        int       DynamicGOP;      //    NVVE_DYNAMIC_GOP,
+        int       RCType;          //    NVVE_RC_TYPE,
+        int       AvgBitrate;      //    NVVE_AVG_BITRATE,
+        int       PeakBitrate;     //    NVVE_PEAK_BITRATE,
+        int       QP_Level_Intra;  //    NVVE_QP_LEVEL_INTRA,
+        int       QP_Level_InterP; //    NVVE_QP_LEVEL_INTER_P,
+        int       QP_Level_InterB; //    NVVE_QP_LEVEL_INTER_B,
+        int       DeblockMode;     //    NVVE_DEBLOCK_MODE,
+        int       ProfileLevel;    //    NVVE_PROFILE_LEVEL,
+        int       ForceIntra;      //    NVVE_FORCE_INTRA,
+        int       ForceIDR;        //    NVVE_FORCE_IDR,
+        int       ClearStat;       //    NVVE_CLEAR_STAT,
+        int       DIMode;          //    NVVE_SET_DEINTERLACE,
+        int       Presets;         //    NVVE_PRESETS,
+        int       DisableCabac;    //    NVVE_DISABLE_CABAC,
+        int       NaluFramingType; //    NVVE_CONFIGURE_NALU_FRAMING_TYPE
+        int       DisableSPSPPS;   //    NVVE_DISABLE_SPS_PPS
+
+        EncoderParams();
+        explicit EncoderParams(const String& configFile);
+
+        void load(const String& configFile);
+        void save(const String& configFile) const;
+    };
+
+
+
+gpu::VideoWriter_GPU::EncoderParams::EncoderParams
+--------------------------------------------------
+Constructors.
+
+.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams()
+.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const String& configFile)
+
+    :param configFile: Config file name.
+
+Creates default parameters or reads parameters from config file.
+
+
+
+gpu::VideoWriter_GPU::EncoderParams::load
+-----------------------------------------
+Reads parameters from config file.
+
+.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::load(const String& configFile)
+
+    :param configFile: Config file name.
+
+
+
+gpu::VideoWriter_GPU::EncoderParams::save
+-----------------------------------------
+Saves parameters to config file.
+
+.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) const
+
+    :param configFile: Config file name.
+
+
+
+gpu::VideoWriter_GPU::EncoderCallBack
+-------------------------------------
+.. ocv:class:: gpu::VideoWriter_GPU::EncoderCallBack
+
+Callbacks for CUDA video encoder. ::
+
+    class EncoderCallBack
+    {
+    public:
+        enum PicType
+        {
+            IFRAME = 1,
+            PFRAME = 2,
+            BFRAME = 3
+        };
+
+        virtual ~EncoderCallBack() {}
+
+        virtual unsigned char* acquireBitStream(int* bufferSize) = 0;
+        virtual void releaseBitStream(unsigned char* data, int size) = 0;
+        virtual void onBeginFrame(int frameNumber, PicType picType) = 0;
+        virtual void onEndFrame(int frameNumber, PicType picType) = 0;
+    };
+
+
+
+gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream
+-------------------------------------------------------
+Callback function to signal the start of bitstream that is to be encoded.
+
+.. ocv:function:: virtual uchar* gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream(int* bufferSize) = 0
+
+Callback must allocate buffer for CUDA encoder and return pointer to it and it's size.
+
+
+
+gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream
+-------------------------------------------------------
+Callback function to signal that the encoded bitstream is ready to be written to file.
+
+.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream(unsigned char* data, int size) = 0
+
+
+
+gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame
+---------------------------------------------------
+Callback function to signal that the encoding operation on the frame has started.
+
+.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame(int frameNumber, PicType picType) = 0
+
+    :param picType: Specify frame type (I-Frame, P-Frame or B-Frame).
+
+
+
+gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame
+-------------------------------------------------
+Callback function signals that the encoding operation on the frame has finished.
+
+.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame(int frameNumber, PicType picType) = 0
+
+    :param picType: Specify frame type (I-Frame, P-Frame or B-Frame).
diff --git a/modules/gpucodec/include/opencv2/gpucodec.hpp b/modules/gpucodec/include/opencv2/gpucodec.hpp
new file mode 100644
index 000000000..ac9c40047
--- /dev/null
+++ b/modules/gpucodec/include/opencv2/gpucodec.hpp
@@ -0,0 +1,265 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUCODEC_HPP__
+#define __OPENCV_GPUCODEC_HPP__
+
+#ifndef __cplusplus
+#  error gpucodec.hpp header must be compiled as C++
+#endif
+
+#include <iosfwd>
+
+#include "opencv2/core/gpumat.hpp"
+
+namespace cv { namespace gpu {
+
+////////////////////////////////// Video Encoding //////////////////////////////////
+
+// Works only under Windows
+// Supports olny H264 video codec and AVI files
+class CV_EXPORTS VideoWriter_GPU
+{
+public:
+    struct EncoderParams;
+
+    // Callbacks for video encoder, use it if you want to work with raw video stream
+    class EncoderCallBack;
+
+    enum SurfaceFormat
+    {
+        SF_UYVY = 0,
+        SF_YUY2,
+        SF_YV12,
+        SF_NV12,
+        SF_IYUV,
+        SF_BGR,
+        SF_GRAY = SF_BGR
+    };
+
+    VideoWriter_GPU();
+    VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+    VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+    ~VideoWriter_GPU();
+
+    // all methods throws cv::Exception if error occurs
+    void open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+    void open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+
+    bool isOpened() const;
+    void close();
+
+    void write(const cv::gpu::GpuMat& image, bool lastFrame = false);
+
+    struct CV_EXPORTS EncoderParams
+    {
+        int       P_Interval;      //    NVVE_P_INTERVAL,
+        int       IDR_Period;      //    NVVE_IDR_PERIOD,
+        int       DynamicGOP;      //    NVVE_DYNAMIC_GOP,
+        int       RCType;          //    NVVE_RC_TYPE,
+        int       AvgBitrate;      //    NVVE_AVG_BITRATE,
+        int       PeakBitrate;     //    NVVE_PEAK_BITRATE,
+        int       QP_Level_Intra;  //    NVVE_QP_LEVEL_INTRA,
+        int       QP_Level_InterP; //    NVVE_QP_LEVEL_INTER_P,
+        int       QP_Level_InterB; //    NVVE_QP_LEVEL_INTER_B,
+        int       DeblockMode;     //    NVVE_DEBLOCK_MODE,
+        int       ProfileLevel;    //    NVVE_PROFILE_LEVEL,
+        int       ForceIntra;      //    NVVE_FORCE_INTRA,
+        int       ForceIDR;        //    NVVE_FORCE_IDR,
+        int       ClearStat;       //    NVVE_CLEAR_STAT,
+        int       DIMode;          //    NVVE_SET_DEINTERLACE,
+        int       Presets;         //    NVVE_PRESETS,
+        int       DisableCabac;    //    NVVE_DISABLE_CABAC,
+        int       NaluFramingType; //    NVVE_CONFIGURE_NALU_FRAMING_TYPE
+        int       DisableSPSPPS;   //    NVVE_DISABLE_SPS_PPS
+
+        EncoderParams();
+        explicit EncoderParams(const String& configFile);
+
+        void load(const String& configFile);
+        void save(const String& configFile) const;
+    };
+
+    EncoderParams getParams() const;
+
+    class CV_EXPORTS EncoderCallBack
+    {
+    public:
+        enum PicType
+        {
+            IFRAME = 1,
+            PFRAME = 2,
+            BFRAME = 3
+        };
+
+        virtual ~EncoderCallBack() {}
+
+        // callback function to signal the start of bitstream that is to be encoded
+        // must return pointer to buffer
+        virtual uchar* acquireBitStream(int* bufferSize) = 0;
+
+        // callback function to signal that the encoded bitstream is ready to be written to file
+        virtual void releaseBitStream(unsigned char* data, int size) = 0;
+
+        // callback function to signal that the encoding operation on the frame has started
+        virtual void onBeginFrame(int frameNumber, PicType picType) = 0;
+
+        // callback function signals that the encoding operation on the frame has finished
+        virtual void onEndFrame(int frameNumber, PicType picType) = 0;
+    };
+
+    class Impl;
+
+private:
+    cv::Ptr<Impl> impl_;
+};
+
+////////////////////////////////// Video Decoding //////////////////////////////////////////
+
+namespace detail
+{
+    class FrameQueue;
+    class VideoParser;
+}
+
+class CV_EXPORTS VideoReader_GPU
+{
+public:
+    enum Codec
+    {
+        MPEG1 = 0,
+        MPEG2,
+        MPEG4,
+        VC1,
+        H264,
+        JPEG,
+        H264_SVC,
+        H264_MVC,
+
+        Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   // Y,U,V (4:2:0)
+        Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,V,U (4:2:0)
+        Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,UV  (4:2:0)
+        Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   // YUYV/YUY2 (4:2:2)
+        Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')),   // UYVY (4:2:2)
+    };
+
+    enum ChromaFormat
+    {
+        Monochrome=0,
+        YUV420,
+        YUV422,
+        YUV444,
+    };
+
+    struct FormatInfo
+    {
+        Codec codec;
+        ChromaFormat chromaFormat;
+        int width;
+        int height;
+    };
+
+    class VideoSource;
+
+    VideoReader_GPU();
+    explicit VideoReader_GPU(const String& filename);
+    explicit VideoReader_GPU(const cv::Ptr<VideoSource>& source);
+
+    ~VideoReader_GPU();
+
+    void open(const String& filename);
+    void open(const cv::Ptr<VideoSource>& source);
+    bool isOpened() const;
+
+    void close();
+
+    bool read(GpuMat& image);
+
+    FormatInfo format() const;
+    void dumpFormat(std::ostream& st);
+
+    class CV_EXPORTS VideoSource
+    {
+    public:
+        VideoSource() : frameQueue_(0), videoParser_(0) {}
+        virtual ~VideoSource() {}
+
+        virtual FormatInfo format() const = 0;
+        virtual void start() = 0;
+        virtual void stop() = 0;
+        virtual bool isStarted() const = 0;
+        virtual bool hasError() const = 0;
+
+        void setFrameQueue(detail::FrameQueue* frameQueue) { frameQueue_ = frameQueue; }
+        void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; }
+
+    protected:
+        bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false);
+
+    private:
+        VideoSource(const VideoSource&);
+        VideoSource& operator =(const VideoSource&);
+
+        detail::FrameQueue* frameQueue_;
+        detail::VideoParser* videoParser_;
+    };
+
+    class Impl;
+
+private:
+    cv::Ptr<Impl> impl_;
+};
+
+}} // namespace cv { namespace gpu {
+
+namespace cv {
+
+template <> CV_EXPORTS void Ptr<cv::gpu::VideoWriter_GPU::Impl>::delete_obj();
+template <> CV_EXPORTS void Ptr<cv::gpu::VideoReader_GPU::Impl>::delete_obj();
+
+}
+
+#endif /* __OPENCV_GPUCODEC_HPP__ */
diff --git a/modules/gpucodec/perf/perf_main.cpp b/modules/gpucodec/perf/perf_main.cpp
new file mode 100644
index 000000000..2f4110b87
--- /dev/null
+++ b/modules/gpucodec/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpucodec, printCudaInfo())
diff --git a/modules/gpucodec/perf/perf_precomp.cpp b/modules/gpucodec/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpucodec/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpucodec/perf/perf_precomp.hpp b/modules/gpucodec/perf/perf_precomp.hpp
new file mode 100644
index 000000000..421fa5763
--- /dev/null
+++ b/modules/gpucodec/perf/perf_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpucodec.hpp"
+#include "opencv2/highgui.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpucodec/perf/perf_video.cpp b/modules/gpucodec/perf/perf_video.cpp
new file mode 100644
index 000000000..8f5e1700e
--- /dev/null
+++ b/modules/gpucodec/perf/perf_video.cpp
@@ -0,0 +1,162 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+#include "opencv2/highgui/highgui_c.h"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+#if defined(HAVE_XINE)         || \
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_AVFOUNDATION) || \
+    defined(HAVE_FFMPEG)       || \
+    defined(WIN32) /* assume that we have ffmpeg */
+
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
+#else
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
+#endif
+
+DEF_PARAM_TEST_1(FileName, string);
+
+//////////////////////////////////////////////////////
+// VideoReader
+
+#if defined(HAVE_NVCUVID) && BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+PERF_TEST_P(FileName, VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
+{
+    declare.time(20);
+
+    const string inputFile = perf::TestBase::getDataPath(GetParam());
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::VideoReader_GPU d_reader(inputFile);
+        ASSERT_TRUE( d_reader.isOpened() );
+
+        cv::gpu::GpuMat frame;
+
+        TEST_CYCLE_N(10) d_reader.read(frame);
+
+        GPU_SANITY_CHECK(frame);
+    }
+    else
+    {
+        cv::VideoCapture reader(inputFile);
+        ASSERT_TRUE( reader.isOpened() );
+
+        cv::Mat frame;
+
+        TEST_CYCLE_N(10) reader >> frame;
+
+        CPU_SANITY_CHECK(frame);
+    }
+}
+
+#endif
+
+//////////////////////////////////////////////////////
+// VideoWriter
+
+#if defined(HAVE_NVCUVID) && defined(WIN32)
+
+PERF_TEST_P(FileName, VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
+{
+    declare.time(30);
+
+    const string inputFile = perf::TestBase::getDataPath(GetParam());
+    const string outputFile = cv::tempfile(".avi");
+
+    const double FPS = 25.0;
+
+    cv::VideoCapture reader(inputFile);
+    ASSERT_TRUE( reader.isOpened() );
+
+    cv::Mat frame;
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::VideoWriter_GPU d_writer;
+
+        cv::gpu::GpuMat d_frame;
+
+        for (int i = 0; i < 10; ++i)
+        {
+            reader >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            d_frame.upload(frame);
+
+            if (!d_writer.isOpened())
+                d_writer.open(outputFile, frame.size(), FPS);
+
+            startTimer(); next();
+            d_writer.write(d_frame);
+            stopTimer();
+        }
+    }
+    else
+    {
+        cv::VideoWriter writer;
+
+        for (int i = 0; i < 10; ++i)
+        {
+            reader >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            if (!writer.isOpened())
+                writer.open(outputFile, CV_FOURCC('X', 'V', 'I', 'D'), FPS, frame.size());
+
+            startTimer(); next();
+            writer.write(frame);
+            stopTimer();
+        }
+    }
+
+    SANITY_CHECK(frame);
+}
+
+#endif
diff --git a/modules/gpucodec/src/cuda/nv12_to_rgb.cu b/modules/gpucodec/src/cuda/nv12_to_rgb.cu
new file mode 100644
index 000000000..536ba2715
--- /dev/null
+++ b/modules/gpucodec/src/cuda/nv12_to_rgb.cu
@@ -0,0 +1,193 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+ * NV12ToARGB color space conversion CUDA kernel
+ *
+ * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
+ * source and converts to output in ARGB format
+ */
+
+#include "opencv2/core/cuda/common.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    __constant__ float constHueColorSpaceMat[9];
+
+    void loadHueCSC(float hueCSC[9])
+    {
+        cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
+    }
+
+    __device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
+    {
+        float luma, chromaCb, chromaCr;
+
+        // Prepare for hue adjustment
+        luma     = (float)yuvi[0];
+        chromaCb = (float)((int)yuvi[1] - 512.0f);
+        chromaCr = (float)((int)yuvi[2] - 512.0f);
+
+       // Convert YUV To RGB with hue adjustment
+       *red   = (luma     * constHueColorSpaceMat[0]) +
+                (chromaCb * constHueColorSpaceMat[1]) +
+                (chromaCr * constHueColorSpaceMat[2]);
+
+       *green = (luma     * constHueColorSpaceMat[3]) +
+                (chromaCb * constHueColorSpaceMat[4]) +
+                (chromaCr * constHueColorSpaceMat[5]);
+
+       *blue  = (luma     * constHueColorSpaceMat[6]) +
+                (chromaCb * constHueColorSpaceMat[7]) +
+                (chromaCr * constHueColorSpaceMat[8]);
+    }
+
+    __device__ uint RGBA_pack_10bit(float red, float green, float blue, uint alpha)
+    {
+        uint ARGBpixel = 0;
+
+        // Clamp final 10 bit results
+        red   = ::fmin(::fmax(red,   0.0f), 1023.f);
+        green = ::fmin(::fmax(green, 0.0f), 1023.f);
+        blue  = ::fmin(::fmax(blue,  0.0f), 1023.f);
+
+        // Convert to 8 bit unsigned integers per color component
+        ARGBpixel = (((uint)blue  >> 2) |
+                    (((uint)green >> 2) << 8)  |
+                    (((uint)red   >> 2) << 16) |
+                    (uint)alpha);
+
+        return ARGBpixel;
+    }
+
+    // CUDA kernel for outputing the final ARGB output from NV12
+
+    #define COLOR_COMPONENT_BIT_SIZE 10
+    #define COLOR_COMPONENT_MASK     0x3FF
+
+    __global__ void NV12_to_RGB(uchar* srcImage, size_t nSourcePitch,
+                                uint* dstImage, size_t nDestPitch,
+                                uint width, uint height)
+    {
+        // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
+        const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
+        const int y = blockIdx.y *  blockDim.y       +  threadIdx.y;
+
+        if (x >= width || y >= height)
+            return;
+
+        // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
+        // if we move to texture we could read 4 luminance values
+
+        uint yuv101010Pel[2];
+
+        yuv101010Pel[0] = (srcImage[y * nSourcePitch + x    ]) << 2;
+        yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
+
+        const size_t chromaOffset = nSourcePitch * height;
+
+        const int y_chroma = y >> 1;
+
+        if (y & 1)  // odd scanline ?
+        {
+            uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x    ];
+            uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
+
+            if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
+            {
+                chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x    ] + 1) >> 1;
+                chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
+            }
+
+            yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
+            yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+
+            yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
+            yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+        }
+        else
+        {
+            yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
+            yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+
+            yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
+            yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+        }
+
+        // this steps performs the color conversion
+        uint yuvi[6];
+        float red[2], green[2], blue[2];
+
+        yuvi[0] =  (yuv101010Pel[0] &   COLOR_COMPONENT_MASK    );
+        yuvi[1] = ((yuv101010Pel[0] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
+        yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
+
+        yuvi[3] =  (yuv101010Pel[1] &   COLOR_COMPONENT_MASK    );
+        yuvi[4] = ((yuv101010Pel[1] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
+        yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
+
+        // YUV to RGB Transformation conversion
+        YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
+        YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
+
+        // Clamp the results to RGBA
+
+        const size_t dstImagePitch = nDestPitch >> 2;
+
+        dstImage[y * dstImagePitch + x     ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24));
+        dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24));
+    }
+
+    void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
+    {
+        dim3 block(32, 8);
+        dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
+
+        NV12_to_RGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
+            interopFrame.cols, interopFrame.rows);
+
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+}}}
diff --git a/modules/gpucodec/src/cuda/rgb_to_yv12.cu b/modules/gpucodec/src/cuda/rgb_to_yv12.cu
new file mode 100644
index 000000000..8787b1e68
--- /dev/null
+++ b/modules/gpucodec/src/cuda/rgb_to_yv12.cu
@@ -0,0 +1,170 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    __device__ __forceinline__ void rgb_to_y(const uchar b, const uchar g, const uchar r, uchar& y)
+    {
+        y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
+    }
+
+    __device__ __forceinline__ void rgb_to_yuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
+    {
+        rgb_to_y(b, g, r, y);
+        u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
+        v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
+    }
+
+    __global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
+        const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
+
+        if (x + 1 >= src.cols || y + 1 >= src.rows)
+            return;
+
+        // get pointers to the data
+        const size_t planeSize = src.rows * dst.step;
+        PtrStepb y_plane(dst.data, dst.step);
+        PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
+        PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
+
+        uchar pix;
+        uchar y_val, u_val, v_val;
+
+        pix = src(y, x);
+        rgb_to_y(pix, pix, pix, y_val);
+        y_plane(y, x) = y_val;
+
+        pix = src(y, x + 1);
+        rgb_to_y(pix, pix, pix, y_val);
+        y_plane(y, x + 1) = y_val;
+
+        pix = src(y + 1, x);
+        rgb_to_y(pix, pix, pix, y_val);
+        y_plane(y + 1, x) = y_val;
+
+        pix = src(y + 1, x + 1);
+        rgb_to_yuv(pix, pix, pix, y_val, u_val, v_val);
+        y_plane(y + 1, x + 1) = y_val;
+        u_plane(y / 2, x / 2) = u_val;
+        v_plane(y / 2, x / 2) = v_val;
+    }
+
+    template <typename T>
+    __global__ void RGB_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
+        const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
+
+        if (x + 1 >= src.cols || y + 1 >= src.rows)
+            return;
+
+        // get pointers to the data
+        const size_t planeSize = src.rows * dst.step;
+        PtrStepb y_plane(dst.data, dst.step);
+        PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
+        PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
+
+        T pix;
+        uchar y_val, u_val, v_val;
+
+        pix = src(y, x);
+        rgb_to_y(pix.z, pix.y, pix.x, y_val);
+        y_plane(y, x) = y_val;
+
+        pix = src(y, x + 1);
+        rgb_to_y(pix.z, pix.y, pix.x, y_val);
+        y_plane(y, x + 1) = y_val;
+
+        pix = src(y + 1, x);
+        rgb_to_y(pix.z, pix.y, pix.x, y_val);
+        y_plane(y + 1, x) = y_val;
+
+        pix = src(y + 1, x + 1);
+        rgb_to_yuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
+        y_plane(y + 1, x + 1) = y_val;
+        u_plane(y / 2, x / 2) = u_val;
+        v_plane(y / 2, x / 2) = v_val;
+    }
+
+    void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream)
+    {
+        dim3 block(32, 8);
+        dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
+
+        Gray_to_YV12<<<grid, block, 0, stream>>>(src, dst);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+    template <int cn>
+    void RGB_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream)
+    {
+        typedef typename TypeVec<uchar, cn>::vec_type src_t;
+
+        dim3 block(32, 8);
+        dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
+
+        RGB_to_YV12<<<grid, block, 0, stream>>>(static_cast< PtrStepSz<src_t> >(src), dst);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream);
+
+        static const func_t funcs[] =
+        {
+            0, Gray_to_YV12_caller, 0, RGB_to_YV12_caller<3>, RGB_to_YV12_caller<4>
+        };
+
+        funcs[cn](src, dst, stream);
+    }
+}}}
diff --git a/modules/gpu/src/cuvid_video_source.cpp b/modules/gpucodec/src/cuvid_video_source.cpp
similarity index 96%
rename from modules/gpu/src/cuvid_video_source.cpp
rename to modules/gpucodec/src/cuvid_video_source.cpp
index b725a707b..73d6d2426 100644
--- a/modules/gpu/src/cuvid_video_source.cpp
+++ b/modules/gpucodec/src/cuvid_video_source.cpp
@@ -40,9 +40,9 @@
 //
 //M*/
 
-#include "cuvid_video_source.h"
+#include "precomp.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#ifdef HAVE_NVCUVID
 
 cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const String& fname)
 {
@@ -69,6 +69,11 @@ cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const String& fname)
     format_.height = vidfmt.coded_height;
 }
 
+cv::gpu::detail::CuvidVideoSource::~CuvidVideoSource()
+{
+    cuvidDestroyVideoSource(videoSource_);
+}
+
 cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::CuvidVideoSource::format() const
 {
     return format_;
@@ -101,4 +106,4 @@ int CUDAAPI cv::gpu::detail::CuvidVideoSource::HandleVideoData(void* userData, C
     return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
 }
 
-#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
+#endif // HAVE_NVCUVID
diff --git a/modules/gpu/src/cuvid_video_source.h b/modules/gpucodec/src/cuvid_video_source.h
similarity index 61%
rename from modules/gpu/src/cuvid_video_source.h
rename to modules/gpucodec/src/cuvid_video_source.h
index 1bf484902..a0b78222d 100644
--- a/modules/gpu/src/cuvid_video_source.h
+++ b/modules/gpucodec/src/cuvid_video_source.h
@@ -43,48 +43,44 @@
 #ifndef __CUVUD_VIDEO_SOURCE_H__
 #define __CUVUD_VIDEO_SOURCE_H__
 
-#include "precomp.hpp"
+#include "opencv2/core/gpu_private.hpp"
+#include "opencv2/gpucodec.hpp"
+#include "thread.h"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#include <nvcuvid.h>
 
-namespace cv { namespace gpu
+namespace cv { namespace gpu { namespace detail
 {
-    namespace detail
-    {
-        class CuvidVideoSource : public VideoReader_GPU::VideoSource
-        {
-        public:
-            explicit CuvidVideoSource(const String& fname);
-            ~CuvidVideoSource() { cuvidDestroyVideoSource(videoSource_); }
 
-            VideoReader_GPU::FormatInfo format() const;
-            void start();
-            void stop();
-            bool isStarted() const;
-            bool hasError() const;
+class CuvidVideoSource : public VideoReader_GPU::VideoSource
+{
+public:
+    explicit CuvidVideoSource(const String& fname);
+    ~CuvidVideoSource();
 
-        private:
-            CuvidVideoSource(const CuvidVideoSource&);
-            CuvidVideoSource& operator =(const CuvidVideoSource&);
+    VideoReader_GPU::FormatInfo format() const;
+    void start();
+    void stop();
+    bool isStarted() const;
+    bool hasError() const;
 
-            // Callback for handling packages of demuxed video data.
-            //
-            // Parameters:
-            //      pUserData - Pointer to user data. We must pass a pointer to a
-            //          VideoSourceData struct here, that contains a valid CUvideoparser
-            //          and FrameQueue.
-            //      pPacket - video-source data packet.
-            //
-            // NOTE: called from a different thread that doesn't not have a cuda context
-            //
-            static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket);
+private:
+    // Callback for handling packages of demuxed video data.
+    //
+    // Parameters:
+    //      pUserData - Pointer to user data. We must pass a pointer to a
+    //          VideoSourceData struct here, that contains a valid CUvideoparser
+    //          and FrameQueue.
+    //      pPacket - video-source data packet.
+    //
+    // NOTE: called from a different thread that doesn't not have a cuda context
+    //
+    static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket);
 
-            CUvideosource videoSource_;
-            VideoReader_GPU::FormatInfo format_;
-        };
-    }
-}}
+    CUvideosource videoSource_;
+    VideoReader_GPU::FormatInfo format_;
+};
 
-#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
+}}}
 
 #endif // __CUVUD_VIDEO_SOURCE_H__
diff --git a/modules/gpu/src/ffmpeg_video_source.cpp b/modules/gpucodec/src/ffmpeg_video_source.cpp
similarity index 94%
rename from modules/gpu/src/ffmpeg_video_source.cpp
rename to modules/gpucodec/src/ffmpeg_video_source.cpp
index 16cd7b64e..6ba09284d 100644
--- a/modules/gpu/src/ffmpeg_video_source.cpp
+++ b/modules/gpucodec/src/ffmpeg_video_source.cpp
@@ -40,14 +40,12 @@
 //
 //M*/
 
-#include "ffmpeg_video_source.h"
+#include "precomp.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#ifdef HAVE_NVCUVID
 
-#if defined(HAVE_FFMPEG) && defined(BUILD_SHARED_LIBS)
+#if defined(HAVE_FFMPEG) && defined(BUILD_SHARED_LIBS) && !defined(WIN32)
     #include "../src/cap_ffmpeg_impl.hpp"
-#else
-    #include "../src/cap_ffmpeg_api.hpp"
 #endif
 
 namespace
@@ -116,11 +114,6 @@ cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname) :
     format_.height = height;
 }
 
-cv::gpu::detail::FFmpegVideoSource::~FFmpegVideoSource()
-{
-    release_InputMediaStream_FFMPEG_p(stream_);
-}
-
 cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::FFmpegVideoSource::format() const
 {
     return format_;
@@ -130,14 +123,14 @@ void cv::gpu::detail::FFmpegVideoSource::start()
 {
     stop_ = false;
     hasError_ = false;
-    thread_.reset(new Thread(readLoop, this));
+    thread_ = new Thread(readLoop, this);
 }
 
 void cv::gpu::detail::FFmpegVideoSource::stop()
 {
     stop_ = true;
     thread_->wait();
-    thread_.reset();
+    thread_.release();
 }
 
 bool cv::gpu::detail::FFmpegVideoSource::isStarted() const
@@ -179,4 +172,9 @@ void cv::gpu::detail::FFmpegVideoSource::readLoop(void* userData)
     thiz->parseVideoData(0, 0, true);
 }
 
+template <> void cv::Ptr<InputMediaStream_FFMPEG>::delete_obj()
+{
+    if (obj) release_InputMediaStream_FFMPEG_p(obj);
+}
+
 #endif // HAVE_CUDA
diff --git a/modules/gpu/src/ffmpeg_video_source.h b/modules/gpucodec/src/ffmpeg_video_source.h
similarity index 69%
rename from modules/gpu/src/ffmpeg_video_source.h
rename to modules/gpucodec/src/ffmpeg_video_source.h
index a2ba40ccc..d097785d7 100644
--- a/modules/gpu/src/ffmpeg_video_source.h
+++ b/modules/gpucodec/src/ffmpeg_video_source.h
@@ -43,46 +43,40 @@
 #ifndef __FFMPEG_VIDEO_SOURCE_H__
 #define __FFMPEG_VIDEO_SOURCE_H__
 
-#include "precomp.hpp"
-#include "thread_wrappers.h"
-
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#include "opencv2/gpucodec.hpp"
+#include "thread.h"
 
 struct InputMediaStream_FFMPEG;
 
-namespace cv { namespace gpu
+namespace cv { namespace gpu { namespace detail {
+
+class FFmpegVideoSource : public VideoReader_GPU::VideoSource
 {
-    namespace detail
-    {
-        class FFmpegVideoSource : public VideoReader_GPU::VideoSource
-        {
-        public:
-            FFmpegVideoSource(const String& fname);
-            ~FFmpegVideoSource();
+public:
+    FFmpegVideoSource(const String& fname);
 
-            VideoReader_GPU::FormatInfo format() const;
-            void start();
-            void stop();
-            bool isStarted() const;
-            bool hasError() const;
+    VideoReader_GPU::FormatInfo format() const;
+    void start();
+    void stop();
+    bool isStarted() const;
+    bool hasError() const;
 
-        private:
-            FFmpegVideoSource(const FFmpegVideoSource&);
-            FFmpegVideoSource& operator =(const FFmpegVideoSource&);
+private:
+    VideoReader_GPU::FormatInfo format_;
 
-            VideoReader_GPU::FormatInfo format_;
+    cv::Ptr<InputMediaStream_FFMPEG> stream_;
 
-            InputMediaStream_FFMPEG* stream_;
+    cv::Ptr<Thread> thread_;
+    volatile bool stop_;
+    volatile bool hasError_;
 
-            std::auto_ptr<Thread> thread_;
-            volatile bool stop_;
-            volatile bool hasError_;
+    static void readLoop(void* userData);
+};
 
-            static void readLoop(void* userData);
-        };
-    }
-}}
+}}}
 
-#endif // HAVE_CUDA
+namespace cv {
+    template <> void Ptr<InputMediaStream_FFMPEG>::delete_obj();
+}
 
-#endif // __CUVUD_VIDEO_SOURCE_H__
+#endif // __FFMPEG_VIDEO_SOURCE_H__
diff --git a/modules/gpu/src/frame_queue.cpp b/modules/gpucodec/src/frame_queue.cpp
similarity index 94%
rename from modules/gpu/src/frame_queue.cpp
rename to modules/gpucodec/src/frame_queue.cpp
index a8b9cff0b..2c5045500 100644
--- a/modules/gpu/src/frame_queue.cpp
+++ b/modules/gpucodec/src/frame_queue.cpp
@@ -40,9 +40,9 @@
 //
 //M*/
 
-#include "frame_queue.h"
+#include "precomp.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#ifdef HAVE_NVCUVID
 
 cv::gpu::detail::FrameQueue::FrameQueue() :
     endOfDecode_(0),
@@ -79,7 +79,7 @@ void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
         bool isFramePlaced = false;
 
         {
-            CriticalSection::AutoLock autoLock(criticalSection_);
+            AutoLock autoLock(mtx_);
 
             if (framesInQueue_ < MaximumSize)
             {
@@ -100,7 +100,7 @@ void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
 
 bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
 {
-    CriticalSection::AutoLock autoLock(criticalSection_);
+    AutoLock autoLock(mtx_);
 
     if (framesInQueue_ > 0)
     {
@@ -114,4 +114,4 @@ bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
     return false;
 }
 
-#endif // HAVE_CUDA
+#endif // HAVE_NVCUVID
diff --git a/modules/gpu/src/frame_queue.h b/modules/gpucodec/src/frame_queue.h
similarity index 55%
rename from modules/gpu/src/frame_queue.h
rename to modules/gpucodec/src/frame_queue.h
index e408b0dd0..51e3bcedb 100644
--- a/modules/gpu/src/frame_queue.h
+++ b/modules/gpucodec/src/frame_queue.h
@@ -43,61 +43,55 @@
 #ifndef __FRAME_QUEUE_H__
 #define __FRAME_QUEUE_H__
 
-#include "precomp.hpp"
-#include "thread_wrappers.h"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/gpu_private.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#include <nvcuvid.h>
 
-namespace cv { namespace gpu
+namespace cv { namespace gpu { namespace detail
 {
-    namespace detail
-    {
-        class FrameQueue
-        {
-        public:
-            static const int MaximumSize = 20; // MAX_FRM_CNT;
 
-            FrameQueue();
+class FrameQueue
+{
+public:
+    static const int MaximumSize = 20; // MAX_FRM_CNT;
 
-            void endDecode() { endOfDecode_ = true; }
-            bool isEndOfDecode() const { return endOfDecode_ != 0;}
+    FrameQueue();
 
-            // Spins until frame becomes available or decoding gets canceled.
-            // If the requested frame is available the method returns true.
-            // If decoding was interupted before the requested frame becomes
-            // available, the method returns false.
-            bool waitUntilFrameAvailable(int pictureIndex);
+    void endDecode() { endOfDecode_ = true; }
+    bool isEndOfDecode() const { return endOfDecode_ != 0;}
 
-            void enqueue(const CUVIDPARSERDISPINFO* picParams);
+    // Spins until frame becomes available or decoding gets canceled.
+    // If the requested frame is available the method returns true.
+    // If decoding was interupted before the requested frame becomes
+    // available, the method returns false.
+    bool waitUntilFrameAvailable(int pictureIndex);
 
-            // Deque the next frame.
-            // Parameters:
-            //      displayInfo - New frame info gets placed into this object.
-            // Returns:
-            //      true, if a new frame was returned,
-            //      false, if the queue was empty and no new frame could be returned.
-            bool dequeue(CUVIDPARSERDISPINFO& displayInfo);
+    void enqueue(const CUVIDPARSERDISPINFO* picParams);
 
-            void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = false; }
+    // Deque the next frame.
+    // Parameters:
+    //      displayInfo - New frame info gets placed into this object.
+    // Returns:
+    //      true, if a new frame was returned,
+    //      false, if the queue was empty and no new frame could be returned.
+    bool dequeue(CUVIDPARSERDISPINFO& displayInfo);
 
-        private:
-            FrameQueue(const FrameQueue&);
-            FrameQueue& operator =(const FrameQueue&);
+    void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = false; }
 
-            bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; }
+private:
+    bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; }
 
-            CriticalSection criticalSection_;
+    Mutex mtx_;
 
-            volatile int isFrameInUse_[MaximumSize];
-            volatile int endOfDecode_;
+    volatile int isFrameInUse_[MaximumSize];
+    volatile int endOfDecode_;
 
-            int framesInQueue_;
-            int readPosition_;
-            CUVIDPARSERDISPINFO displayQueue_[MaximumSize];
-        };
-    }
-}}
+    int framesInQueue_;
+    int readPosition_;
+    CUVIDPARSERDISPINFO displayQueue_[MaximumSize];
+};
 
-#endif // HAVE_CUDA
+}}}
 
 #endif // __FRAME_QUEUE_H__
diff --git a/modules/gpucodec/src/precomp.cpp b/modules/gpucodec/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpucodec/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpucodec/src/precomp.hpp b/modules/gpucodec/src/precomp.hpp
new file mode 100644
index 000000000..9db176e66
--- /dev/null
+++ b/modules/gpucodec/src/precomp.hpp
@@ -0,0 +1,79 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <cstdlib>
+#include <cstring>
+#include <deque>
+#include <utility>
+#include <stdexcept>
+#include <iostream>
+
+#include "opencv2/gpucodec.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#ifdef HAVE_NVCUVID
+    #include <nvcuvid.h>
+
+    #ifdef WIN32
+        #define NOMINMAX
+        #include <windows.h>
+        #include <NVEncoderAPI.h>
+    #else
+        #include <pthread.h>
+        #include <unistd.h>
+    #endif
+
+    #include "thread.h"
+    #include "ffmpeg_video_source.h"
+    #include "cuvid_video_source.h"
+    #include "frame_queue.h"
+    #include "video_decoder.h"
+    #include "video_parser.h"
+
+    #include "../src/cap_ffmpeg_api.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpucodec/src/thread.cpp b/modules/gpucodec/src/thread.cpp
new file mode 100644
index 000000000..db9f3de39
--- /dev/null
+++ b/modules/gpucodec/src/thread.cpp
@@ -0,0 +1,174 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_NVCUVID
+
+using namespace cv::gpu::detail;
+
+#ifdef WIN32
+
+namespace
+{
+    struct UserData
+    {
+        Thread::Func func;
+        void* param;
+    };
+
+    DWORD WINAPI WinThreadFunction(LPVOID lpParam)
+    {
+        UserData* userData = static_cast<UserData*>(lpParam);
+
+        userData->func(userData->param);
+
+        return 0;
+    }
+}
+
+class cv::gpu::detail::Thread::Impl
+{
+public:
+    Impl(Thread::Func func, void* userData)
+    {
+        userData_.func = func;
+        userData_.param = userData;
+
+        thread_ = CreateThread(
+            NULL,                   // default security attributes
+            0,                      // use default stack size
+            WinThreadFunction,      // thread function name
+            &userData_,             // argument to thread function
+            0,                      // use default creation flags
+            &threadId_);            // returns the thread identifier
+    }
+
+    ~Impl()
+    {
+        CloseHandle(thread_);
+    }
+
+    void wait()
+    {
+        WaitForSingleObject(thread_, INFINITE);
+    }
+
+private:
+    UserData userData_;
+    HANDLE thread_;
+    DWORD threadId_;
+};
+
+#else
+
+namespace
+{
+    struct UserData
+    {
+        Thread::Func func;
+        void* param;
+    };
+
+    void* PThreadFunction(void* lpParam)
+    {
+        UserData* userData = static_cast<UserData*>(lpParam);
+
+        userData->func(userData->param);
+
+        return 0;
+    }
+}
+
+class cv::gpu::detail::Thread::Impl
+{
+public:
+    Impl(Thread::Func func, void* userData)
+    {
+        userData_.func = func;
+        userData_.param = userData;
+
+        pthread_create(&thread_, NULL, PThreadFunction, &userData_);
+    }
+
+    ~Impl()
+    {
+        pthread_detach(thread_);
+    }
+
+    void wait()
+    {
+        pthread_join(thread_, NULL);
+    }
+
+private:
+    pthread_t thread_;
+    UserData userData_;
+};
+
+#endif
+
+cv::gpu::detail::Thread::Thread(Func func, void* userData) :
+    impl_(new Impl(func, userData))
+{
+}
+
+void cv::gpu::detail::Thread::wait()
+{
+    impl_->wait();
+}
+
+void cv::gpu::detail::Thread::sleep(int ms)
+{
+#ifdef WIN32
+    ::Sleep(ms);
+#else
+    ::usleep(ms * 1000);
+#endif
+}
+
+template <> void cv::Ptr<cv::gpu::detail::Thread::Impl>::delete_obj()
+{
+    if (obj) delete obj;
+}
+
+#endif // HAVE_NVCUVID
diff --git a/modules/gpu/src/thread_wrappers.h b/modules/gpucodec/src/thread.h
similarity index 61%
rename from modules/gpu/src/thread_wrappers.h
rename to modules/gpucodec/src/thread.h
index da811737d..1489f5830 100644
--- a/modules/gpu/src/thread_wrappers.h
+++ b/modules/gpucodec/src/thread.h
@@ -43,70 +43,31 @@
 #ifndef __THREAD_WRAPPERS_H__
 #define __THREAD_WRAPPERS_H__
 
-#include "precomp.hpp"
+#include "opencv2/core.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+namespace cv { namespace gpu { namespace detail {
 
-namespace cv { namespace gpu
+class Thread
 {
-    namespace detail
-    {
-        class CriticalSection
-        {
-        public:
-            CriticalSection();
-            ~CriticalSection();
+public:
+    typedef void (*Func)(void* userData);
 
-            void enter();
-            void leave();
+    explicit Thread(Func func, void* userData = 0);
 
-            class AutoLock
-            {
-            public:
-                explicit AutoLock(CriticalSection& criticalSection) :
-                    criticalSection_(criticalSection)
-                {
-                    criticalSection_.enter();
-                }
+    void wait();
 
-                ~AutoLock()
-                {
-                    criticalSection_.leave();
-                }
+    static void sleep(int ms);
 
-            private:
-                CriticalSection& criticalSection_;
-            };
+    class Impl;
 
-        private:
-            CriticalSection(const CriticalSection&);
-            CriticalSection& operator=(const CriticalSection&);
+private:
+    cv::Ptr<Impl> impl_;
+};
 
-            class Impl;
-            std::auto_ptr<Impl> impl_;
-        };
+}}}
 
-        class Thread
-        {
-        public:
-            explicit Thread(void (*func)(void* userData), void* userData = 0);
-            ~Thread();
-
-            void wait();
-
-            static void sleep(int ms);
-
-        private:
-            Thread(const Thread&);
-            Thread& operator=(const Thread&);
-
-            class Impl;
-            std::auto_ptr<Impl> impl_;
-        };
-
-    }
-}}
-
-#endif // HAVE_CUDA
+namespace cv {
+    template <> void Ptr<cv::gpu::detail::Thread::Impl>::delete_obj();
+}
 
 #endif // __THREAD_WRAPPERS_H__
diff --git a/modules/gpu/src/video_decoder.cpp b/modules/gpucodec/src/video_decoder.cpp
similarity index 97%
rename from modules/gpu/src/video_decoder.cpp
rename to modules/gpucodec/src/video_decoder.cpp
index fe897895d..7e28e872b 100644
--- a/modules/gpu/src/video_decoder.cpp
+++ b/modules/gpucodec/src/video_decoder.cpp
@@ -40,10 +40,9 @@
 //
 //M*/
 
-#include "video_decoder.h"
-#include "frame_queue.h"
+#include "precomp.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#ifdef HAVE_NVCUVID
 
 void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& videoFormat)
 {
@@ -113,4 +112,4 @@ void cv::gpu::detail::VideoDecoder::release()
     }
 }
 
-#endif // HAVE_CUDA
+#endif // HAVE_NVCUVID
diff --git a/modules/gpucodec/src/video_decoder.h b/modules/gpucodec/src/video_decoder.h
new file mode 100644
index 000000000..e31ec1a0d
--- /dev/null
+++ b/modules/gpucodec/src/video_decoder.h
@@ -0,0 +1,111 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __VIDEO_DECODER_H__
+#define __VIDEO_DECODER_H__
+
+#include "opencv2/core/gpu_private.hpp"
+#include "opencv2/gpucodec.hpp"
+
+#include <nvcuvid.h>
+
+namespace cv { namespace gpu { namespace detail
+{
+
+class VideoDecoder
+{
+public:
+    VideoDecoder(const VideoReader_GPU::FormatInfo& videoFormat, CUvideoctxlock lock) : lock_(lock), decoder_(0)
+    {
+        create(videoFormat);
+    }
+
+    ~VideoDecoder()
+    {
+        release();
+    }
+
+    void create(const VideoReader_GPU::FormatInfo& videoFormat);
+    void release();
+
+    // Get the code-type currently used.
+    cudaVideoCodec codec() const { return createInfo_.CodecType; }
+    unsigned long maxDecodeSurfaces() const { return createInfo_.ulNumDecodeSurfaces; }
+
+    unsigned long frameWidth() const { return createInfo_.ulWidth; }
+    unsigned long frameHeight() const { return createInfo_.ulHeight; }
+
+    unsigned long targetWidth() const { return createInfo_.ulTargetWidth; }
+    unsigned long targetHeight() const { return createInfo_.ulTargetHeight; }
+
+    cudaVideoChromaFormat chromaFormat() const { return createInfo_.ChromaFormat; }
+
+    bool decodePicture(CUVIDPICPARAMS* picParams)
+    {
+        return cuvidDecodePicture(decoder_, picParams) == CUDA_SUCCESS;
+    }
+
+    cv::gpu::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams)
+    {
+        CUdeviceptr ptr;
+        unsigned int pitch;
+
+        cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );
+
+        return GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
+    }
+
+    void unmapFrame(cv::gpu::GpuMat& frame)
+    {
+        cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) );
+        frame.release();
+    }
+
+private:
+    CUvideoctxlock lock_;
+    CUVIDDECODECREATEINFO createInfo_;
+    CUvideodecoder        decoder_;
+};
+
+}}}
+
+#endif // __VIDEO_DECODER_H__
diff --git a/modules/gpu/src/video_parser.cpp b/modules/gpucodec/src/video_parser.cpp
similarity index 98%
rename from modules/gpu/src/video_parser.cpp
rename to modules/gpucodec/src/video_parser.cpp
index ab96d12b9..620f85fe8 100644
--- a/modules/gpu/src/video_parser.cpp
+++ b/modules/gpucodec/src/video_parser.cpp
@@ -40,9 +40,9 @@
 //
 //M*/
 
-#include "video_parser.h"
+#include "precomp.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#ifdef HAVE_NVCUVID
 
 cv::gpu::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue) :
     videoDecoder_(videoDecoder), frameQueue_(frameQueue), unparsedPackets_(0), hasError_(false)
@@ -158,4 +158,4 @@ int CUDAAPI cv::gpu::detail::VideoParser::HandlePictureDisplay(void* userData, C
     return true;
 }
 
-#endif // HAVE_CUDA
+#endif // HAVE_NVCUVID
diff --git a/modules/gpu/src/video_parser.h b/modules/gpucodec/src/video_parser.h
similarity index 54%
rename from modules/gpu/src/video_parser.h
rename to modules/gpucodec/src/video_parser.h
index 15700664b..a26dd3eb7 100644
--- a/modules/gpu/src/video_parser.h
+++ b/modules/gpucodec/src/video_parser.h
@@ -43,58 +43,52 @@
 #ifndef __VIDEO_PARSER_H__
 #define __VIDEO_PARSER_H__
 
-#include "precomp.hpp"
-
+#include "opencv2/core/gpu_private.hpp"
+#include "opencv2/gpucodec.hpp"
 #include "frame_queue.h"
 #include "video_decoder.h"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#include <nvcuvid.h>
 
-namespace cv { namespace gpu
+namespace cv { namespace gpu { namespace detail
 {
-    namespace detail
+
+class VideoParser
+{
+public:
+    VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue);
+
+    ~VideoParser()
     {
-        class VideoParser
-        {
-        public:
-            VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue);
-
-            ~VideoParser()
-            {
-                cuvidDestroyVideoParser(parser_);
-            }
-
-            bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream);
-
-            bool hasError() const { return hasError_; }
-
-        private:
-            VideoParser(const VideoParser&);
-            VideoParser& operator =(const VideoParser&);
-
-            VideoDecoder* videoDecoder_;
-            FrameQueue* frameQueue_;
-            CUvideoparser parser_;
-            int unparsedPackets_;
-            volatile bool hasError_;
-
-            // Called when the decoder encounters a video format change (or initial sequence header)
-            // This particular implementation of the callback returns 0 in case the video format changes
-            // to something different than the original format. Returning 0 causes a stop of the app.
-            static int CUDAAPI HandleVideoSequence(void* pUserData, CUVIDEOFORMAT* pFormat);
-
-            // Called by the video parser to decode a single picture
-            // Since the parser will deliver data as fast as it can, we need to make sure that the picture
-            // index we're attempting to use for decode is no longer used for display
-            static int CUDAAPI HandlePictureDecode(void* pUserData, CUVIDPICPARAMS* pPicParams);
-
-            // Called by the video parser to display a video frame (in the case of field pictures, there may be
-            // 2 decode calls per 1 display call, since two fields make up one frame)
-            static int CUDAAPI HandlePictureDisplay(void* pUserData, CUVIDPARSERDISPINFO* pPicParams);
-        };
+        cuvidDestroyVideoParser(parser_);
     }
-}}
 
-#endif // HAVE_CUDA
+    bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream);
+
+    bool hasError() const { return hasError_; }
+
+private:
+    VideoDecoder* videoDecoder_;
+    FrameQueue* frameQueue_;
+    CUvideoparser parser_;
+    int unparsedPackets_;
+    volatile bool hasError_;
+
+    // Called when the decoder encounters a video format change (or initial sequence header)
+    // This particular implementation of the callback returns 0 in case the video format changes
+    // to something different than the original format. Returning 0 causes a stop of the app.
+    static int CUDAAPI HandleVideoSequence(void* pUserData, CUVIDEOFORMAT* pFormat);
+
+    // Called by the video parser to decode a single picture
+    // Since the parser will deliver data as fast as it can, we need to make sure that the picture
+    // index we're attempting to use for decode is no longer used for display
+    static int CUDAAPI HandlePictureDecode(void* pUserData, CUVIDPICPARAMS* pPicParams);
+
+    // Called by the video parser to display a video frame (in the case of field pictures, there may be
+    // 2 decode calls per 1 display call, since two fields make up one frame)
+    static int CUDAAPI HandlePictureDisplay(void* pUserData, CUVIDPARSERDISPINFO* pPicParams);
+};
+
+}}}
 
 #endif // __VIDEO_PARSER_H__
diff --git a/modules/gpu/src/video_reader.cpp b/modules/gpucodec/src/video_reader.cpp
similarity index 89%
rename from modules/gpu/src/video_reader.cpp
rename to modules/gpucodec/src/video_reader.cpp
index 7bc63dae3..dbb4bbcf2 100644
--- a/modules/gpu/src/video_reader.cpp
+++ b/modules/gpucodec/src/video_reader.cpp
@@ -42,7 +42,7 @@
 
 #include "precomp.hpp"
 
-#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) || !defined(HAVE_NVCUVID)
+#ifndef HAVE_NVCUVID
 
 class cv::gpu::VideoReader_GPU::Impl
 {
@@ -61,14 +61,7 @@ cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::VideoReader_GPU::format() const {
 bool cv::gpu::VideoReader_GPU::VideoSource::parseVideoData(const unsigned char*, size_t, bool) { throw_no_cuda(); return false; }
 void cv::gpu::VideoReader_GPU::dumpFormat(std::ostream&) { throw_no_cuda(); }
 
-#else // HAVE_CUDA
-
-#include "frame_queue.h"
-#include "video_decoder.h"
-#include "video_parser.h"
-
-#include "cuvid_video_source.h"
-#include "ffmpeg_video_source.h"
+#else // HAVE_NVCUVID
 
 class cv::gpu::VideoReader_GPU::Impl
 {
@@ -81,14 +74,11 @@ public:
     cv::gpu::VideoReader_GPU::FormatInfo format() const { return videoSource_->format(); }
 
 private:
-    Impl(const Impl&);
-    Impl& operator =(const Impl&);
-
     cv::Ptr<cv::gpu::VideoReader_GPU::VideoSource> videoSource_;
 
-    std::auto_ptr<cv::gpu::detail::FrameQueue> frameQueue_;
-    std::auto_ptr<cv::gpu::detail::VideoDecoder> videoDecoder_;
-    std::auto_ptr<cv::gpu::detail::VideoParser> videoParser_;
+    cv::Ptr<cv::gpu::detail::FrameQueue> frameQueue_;
+    cv::Ptr<cv::gpu::detail::VideoDecoder> videoDecoder_;
+    cv::Ptr<cv::gpu::detail::VideoParser> videoParser_;
 
     CUvideoctxlock lock_;
 
@@ -110,12 +100,12 @@ cv::gpu::VideoReader_GPU::Impl::Impl(const cv::Ptr<VideoSource>& source) :
     cuSafeCall( cuCtxGetCurrent(&ctx) );
     cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
 
-    frameQueue_.reset(new detail::FrameQueue);
-    videoDecoder_.reset(new detail::VideoDecoder(videoSource_->format(), lock_));
-    videoParser_.reset(new detail::VideoParser(videoDecoder_.get(), frameQueue_.get()));
+    frameQueue_ = new detail::FrameQueue;
+    videoDecoder_ = new detail::VideoDecoder(videoSource_->format(), lock_);
+    videoParser_ = new detail::VideoParser(videoDecoder_, frameQueue_);
 
-    videoSource_->setFrameQueue(frameQueue_.get());
-    videoSource_->setVideoParser(videoParser_.get());
+    videoSource_->setFrameQueue(frameQueue_);
+    videoSource_->setVideoParser(videoParser_);
 
     videoSource_->start();
 }
@@ -126,12 +116,10 @@ cv::gpu::VideoReader_GPU::Impl::~Impl()
     videoSource_->stop();
 }
 
-namespace cv { namespace gpu { namespace cudev {
-    namespace video_decoding
-    {
-        void loadHueCSC(float hueCSC[9]);
-        void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<unsigned int> interopFrame, cudaStream_t stream = 0);
-    }
+namespace cv { namespace gpu { namespace cudev
+{
+    void loadHueCSC(float hueCSC[9]);
+    void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream = 0);
 }}}
 
 namespace
@@ -187,7 +175,7 @@ namespace
 
     void cudaPostProcessFrame(const cv::gpu::GpuMat& decodedFrame, cv::gpu::GpuMat& interopFrame, int width, int height)
     {
-        using namespace cv::gpu::cudev::video_decoding;
+        using namespace cv::gpu::cudev;
 
         static bool updateCSC = true;
         static float hueColorSpaceMat[9];
@@ -210,7 +198,7 @@ namespace
 
         loadHueCSC(hueColorSpaceMat);
 
-        NV12ToARGB_gpu(decodedFrame, interopFrame);
+        NV12_to_RGB(decodedFrame, interopFrame);
     }
 }
 
@@ -329,17 +317,17 @@ void cv::gpu::VideoReader_GPU::open(const cv::Ptr<VideoSource>& source)
 {
     CV_Assert( !source.empty() );
     close();
-    impl_.reset(new Impl(source));
+    impl_ = new Impl(source);
 }
 
 bool cv::gpu::VideoReader_GPU::isOpened() const
 {
-    return impl_.get() != 0;
+    return !impl_.empty();
 }
 
 void cv::gpu::VideoReader_GPU::close()
 {
-    impl_.reset();
+    impl_.release();
 }
 
 bool cv::gpu::VideoReader_GPU::read(GpuMat& image)
@@ -396,4 +384,9 @@ void cv::gpu::VideoReader_GPU::dumpFormat(std::ostream& st)
     st << "Chroma Format : " << chromas[_format.chromaFormat] << std::endl;
 }
 
-#endif // HAVE_CUDA
+#endif // HAVE_NVCUVID
+
+template <> void cv::Ptr<cv::gpu::VideoReader_GPU::Impl>::delete_obj()
+{
+    if (obj) delete obj;
+}
diff --git a/modules/gpu/src/video_writer.cpp b/modules/gpucodec/src/video_writer.cpp
similarity index 97%
rename from modules/gpu/src/video_writer.cpp
rename to modules/gpucodec/src/video_writer.cpp
index 987be9727..94100c0b8 100644
--- a/modules/gpu/src/video_writer.cpp
+++ b/modules/gpucodec/src/video_writer.cpp
@@ -42,7 +42,7 @@
 
 #include "precomp.hpp"
 
-#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) || !defined(HAVE_NVCUVID) || !defined(WIN32)
+#if !defined(HAVE_NVCUVID) || !defined(WIN32)
 
 class cv::gpu::VideoWriter_GPU::Impl
 {
@@ -70,13 +70,6 @@ void cv::gpu::VideoWriter_GPU::EncoderParams::save(const String&) const { throw_
 
 #else // !defined HAVE_CUDA || !defined WIN32
 
-#ifdef HAVE_FFMPEG
-    #include "../src/cap_ffmpeg_impl.hpp"
-#else
-    #include "../src/cap_ffmpeg_api.hpp"
-#endif
-
-
 ///////////////////////////////////////////////////////////////////////////
 // VideoWriter_GPU::Impl
 
@@ -91,7 +84,7 @@ namespace
 
             err = NVGetHWEncodeCaps();
             if (err)
-                CV_Error(CV_GpuNotSupported, "No CUDA capability present");
+                CV_Error(cv::Error::GpuNotSupported, "No CUDA capability present");
 
             // Create the Encoder API Interface
             err = NVCreateEncoder(&encoder_);
@@ -212,7 +205,7 @@ void cv::gpu::VideoWriter_GPU::Impl::initEncoder(double fps)
     };
     err = NVSetCodec(encoder_, codecs_id[codec_]);
     if (err)
-        CV_Error(CV_StsNotImplemented, "Codec format is not supported");
+        CV_Error(cv::Error::StsNotImplemented, "Codec format is not supported");
 
     // Set default params
 
@@ -501,14 +494,6 @@ void cv::gpu::VideoWriter_GPU::Impl::createHWEncoder()
     CV_Assert( err == 0 );
 }
 
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace video_encoding
-    {
-        void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst);
-    }
-}}}
-
 namespace
 {
     // UYVY/YUY2 are both 4:2:2 formats (16bpc)
@@ -644,6 +629,11 @@ namespace
     }
 }
 
+namespace cv { namespace gpu { namespace cudev
+{
+    void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream = 0);
+}}}
+
 void cv::gpu::VideoWriter_GPU::Impl::write(const cv::gpu::GpuMat& frame, bool lastFrame)
 {
     if (inputFormat_ == SF_BGR)
@@ -674,7 +664,7 @@ void cv::gpu::VideoWriter_GPU::Impl::write(const cv::gpu::GpuMat& frame, bool la
     CV_Assert( res == CUDA_SUCCESS );
 
     if (inputFormat_ == SF_BGR)
-        cv::gpu::cudev::video_encoding::YV12_gpu(frame, frame.channels(), videoFrame_);
+        cv::gpu::cudev::RGB_to_YV12(frame, frame.channels(), videoFrame_);
     else
     {
         switch (surfaceFormat_)
@@ -829,11 +819,14 @@ void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size)
 
 void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType)
 {
+    (void) frameNumber;
     isKeyFrame_ = picType == IFRAME;
 }
 
 void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType)
 {
+    (void) frameNumber;
+    (void) picType;
 }
 
 ///////////////////////////////////////////////////////////////////////////
@@ -885,23 +878,23 @@ void cv::gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize,
 void cv::gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format)
 {
     close();
-    impl_.reset(new Impl(encoderCallback, frameSize, fps, format));
+    impl_ = new Impl(encoderCallback, frameSize, fps, format);
 }
 
 void cv::gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
 {
     close();
-    impl_.reset(new Impl(encoderCallback, frameSize, fps, params, format));
+    impl_ = new Impl(encoderCallback, frameSize, fps, params, format);
 }
 
 bool cv::gpu::VideoWriter_GPU::isOpened() const
 {
-    return impl_.get() != 0;
+    return !impl_.empty();
 }
 
 void cv::gpu::VideoWriter_GPU::close()
 {
-    impl_.reset();
+    impl_.release();
 }
 
 void cv::gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame)
@@ -1002,3 +995,8 @@ void cv::gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) con
 }
 
 #endif // !defined HAVE_CUDA || !defined WIN32
+
+template <> void cv::Ptr<cv::gpu::VideoWriter_GPU::Impl>::delete_obj()
+{
+    if (obj) delete obj;
+}
diff --git a/modules/gpucodec/test/test_main.cpp b/modules/gpucodec/test/test_main.cpp
new file mode 100644
index 000000000..958adfee5
--- /dev/null
+++ b/modules/gpucodec/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_TEST_MAIN("gpu")
diff --git a/modules/gpucodec/test/test_precomp.cpp b/modules/gpucodec/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpucodec/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpucodec/test/test_precomp.hpp b/modules/gpucodec/test/test_precomp.hpp
new file mode 100644
index 000000000..0dc79935d
--- /dev/null
+++ b/modules/gpucodec/test/test_precomp.hpp
@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpucodec.hpp"
+#include "opencv2/highgui.hpp"
+
+#endif
diff --git a/modules/gpu/test/test_video.cpp b/modules/gpucodec/test/test_video.cpp
similarity index 75%
rename from modules/gpu/test/test_video.cpp
rename to modules/gpucodec/test/test_video.cpp
index f28cd3cf4..55fc3f87c 100644
--- a/modules/gpu/test/test_video.cpp
+++ b/modules/gpucodec/test/test_video.cpp
@@ -42,29 +42,21 @@
 
 #include "test_precomp.hpp"
 
-#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#ifdef HAVE_NVCUVID
+
+PARAM_TEST_CASE(Video, cv::gpu::DeviceInfo, std::string)
+{
+};
 
 //////////////////////////////////////////////////////
 // VideoReader
 
-PARAM_TEST_CASE(VideoReader, cv::gpu::DeviceInfo, std::string)
+GPU_TEST_P(Video, Reader)
 {
-    cv::gpu::DeviceInfo devInfo;
-    std::string inputFile;
+    cv::gpu::setDevice(GET_PARAM(0).deviceID());
 
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        inputFile = GET_PARAM(1);
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + inputFile;
-    }
-};
-
-GPU_TEST_P(VideoReader, Regression)
-{
     cv::gpu::VideoReader_GPU reader(inputFile);
     ASSERT_TRUE(reader.isOpened());
 
@@ -80,33 +72,17 @@ GPU_TEST_P(VideoReader, Regression)
     ASSERT_FALSE(reader.isOpened());
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, VideoReader, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
-
 //////////////////////////////////////////////////////
 // VideoWriter
 
 #ifdef WIN32
 
-PARAM_TEST_CASE(VideoWriter, cv::gpu::DeviceInfo, std::string)
+GPU_TEST_P(Video, Writer)
 {
-    cv::gpu::DeviceInfo devInfo;
-    std::string inputFile;
+    cv::gpu::setDevice(GET_PARAM(0).deviceID());
 
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        inputFile = GET_PARAM(1);
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + std::string("video/") + inputFile;
-    }
-};
-
-GPU_TEST_P(VideoWriter, Regression)
-{
     std::string outputFile = cv::tempfile(".avi");
     const double FPS = 25.0;
 
@@ -144,10 +120,10 @@ GPU_TEST_P(VideoWriter, Regression)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, VideoWriter, testing::Combine(
+#endif // WIN32
+
+INSTANTIATE_TEST_CASE_P(GPU_Codec, Video, testing::Combine(
     ALL_DEVICES,
     testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
 
-#endif // WIN32
-
-#endif //  defined(HAVE_CUDA) && defined(HAVE_NVCUVID)
+#endif // HAVE_NVCUVID
diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt
index 6c6022c72..d111a79eb 100644
--- a/modules/superres/CMakeLists.txt
+++ b/modules/superres/CMakeLists.txt
@@ -4,4 +4,4 @@ endif()
 
 set(the_description "Super Resolution")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
-ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui)
+ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui opencv_gpucodec)
diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp
index 5e6ed0a2b..cba2b14ea 100644
--- a/modules/superres/src/frame_source.cpp
+++ b/modules/superres/src/frame_source.cpp
@@ -187,7 +187,7 @@ Ptr<FrameSource> cv::superres::createFrameSource_Camera(int deviceId)
 //////////////////////////////////////////////////////
 // VideoFrameSource_GPU
 
-#ifndef HAVE_OPENCV_GPU
+#ifndef HAVE_OPENCV_GPUCODEC
 
 Ptr<FrameSource> cv::superres::createFrameSource_Video_GPU(const String& fileName)
 {
@@ -196,7 +196,7 @@ Ptr<FrameSource> cv::superres::createFrameSource_Video_GPU(const String& fileNam
     return Ptr<FrameSource>();
 }
 
-#else // HAVE_OPENCV_GPU
+#else // HAVE_OPENCV_GPUCODEC
 
 namespace
 {
@@ -250,4 +250,4 @@ Ptr<FrameSource> cv::superres::createFrameSource_Video_GPU(const String& fileNam
     return new VideoFrameSource(fileName);
 }
 
-#endif // HAVE_OPENCV_GPU
+#endif // HAVE_OPENCV_GPUCODEC
diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp
index 429bd485a..960d9b71c 100644
--- a/modules/superres/src/precomp.hpp
+++ b/modules/superres/src/precomp.hpp
@@ -60,6 +60,10 @@
 #  include "opencv2/core/gpu_private.hpp"
 #endif
 
+#ifdef HAVE_OPENCV_GPUCODEC
+#  include "opencv2/gpucodec.hpp"
+#endif
+
 #ifdef HAVE_OPENCV_HIGHGUI
     #include "opencv2/highgui.hpp"
 #endif
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index ee5910630..015df939b 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -1,7 +1,8 @@
 SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc opencv_highgui
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
-                                     opencv_nonfree opencv_softcascade opencv_superres)
+                                     opencv_nonfree opencv_softcascade opencv_superres
+                                     opencv_gpucodec)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 
diff --git a/samples/gpu/video_reader.cpp b/samples/gpu/video_reader.cpp
index a4c068516..f9bbbbb33 100644
--- a/samples/gpu/video_reader.cpp
+++ b/samples/gpu/video_reader.cpp
@@ -4,11 +4,11 @@
 #include <algorithm>
 #include <numeric>
 
-#include <opencv2/core/core.hpp>
+#include <opencv2/core.hpp>
 #include <opencv2/core/opengl.hpp>
-#include <opencv2/gpu/gpu.hpp>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/contrib/contrib.hpp>
+#include <opencv2/gpucodec.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/contrib.hpp>
 
 int main(int argc, const char* argv[])
 {
diff --git a/samples/gpu/video_writer.cpp b/samples/gpu/video_writer.cpp
index 797656752..aed76e04e 100644
--- a/samples/gpu/video_writer.cpp
+++ b/samples/gpu/video_writer.cpp
@@ -2,10 +2,10 @@
 #include <vector>
 #include <numeric>
 
-#include "opencv2/core/core.hpp"
-#include "opencv2/gpu/gpu.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/contrib/contrib.hpp"
+#include "opencv2/core.hpp"
+#include "opencv2/gpucodec.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/contrib.hpp"
 
 int main(int argc, const char* argv[])
 {

From 31c8b527c618112aa132f2200e388bb1b8115883 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 17:39:17 +0400
Subject: [PATCH 02/49] gpuarithm module for arithmetics operations on matrices

---
 .../include/opencv2/core/cuda/emulation.hpp   |  125 +-
 modules/gpu/CMakeLists.txt                    |    6 +-
 modules/gpu/doc/gpu.rst                       |    3 -
 modules/gpu/doc/image_processing.rst          |   22 -
 modules/gpu/include/opencv2/gpu.hpp           |  239 +-
 modules/gpu/src/cuda/element_operations.cu    | 2636 -----------------
 modules/gpu/src/cuda/matrix_reductions.cu     | 1366 ---------
 modules/gpu/src/cuda/safe_call.hpp            |   10 -
 modules/gpu/src/error.cpp                     |   23 -
 modules/gpu/src/imgproc.cpp                   |   74 +
 modules/gpu/src/precomp.hpp                   |    4 -
 modules/gpuarithm/CMakeLists.txt              |   13 +
 modules/gpuarithm/doc/gpuarithm.rst           |   10 +
 .../doc/matrix_reductions.rst                 |    0
 .../doc/operations_on_matrices.rst            |    0
 .../doc/per_element_operations.rst            |   22 +
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |  279 ++
 modules/{gpu => gpuarithm}/perf/perf_core.cpp |    0
 modules/gpuarithm/perf/perf_main.cpp          |   47 +
 modules/gpuarithm/perf/perf_precomp.cpp       |   43 +
 modules/gpuarithm/perf/perf_precomp.hpp       |   64 +
 modules/{gpu => gpuarithm}/src/arithm.cpp     |   86 +-
 modules/gpuarithm/src/cuda/absdiff_mat.cu     |  147 +
 modules/gpuarithm/src/cuda/absdiff_scalar.cu  |   98 +
 modules/gpuarithm/src/cuda/add_mat.cu         |  185 ++
 modules/gpuarithm/src/cuda/add_scalar.cu      |  148 +
 modules/gpuarithm/src/cuda/add_weighted.cu    |  364 +++
 .../gpuarithm/src/cuda/arithm_func_traits.hpp |  145 +
 modules/gpuarithm/src/cuda/bitwise_mat.cu     |  126 +
 modules/gpuarithm/src/cuda/bitwise_scalar.cu  |  104 +
 modules/gpuarithm/src/cuda/cmp_mat.cu         |  206 ++
 modules/gpuarithm/src/cuda/cmp_scalar.cu      |  284 ++
 modules/gpuarithm/src/cuda/countnonzero.cu    |  175 ++
 modules/gpuarithm/src/cuda/div_inv.cu         |  144 +
 modules/gpuarithm/src/cuda/div_mat.cu         |  230 ++
 modules/gpuarithm/src/cuda/div_scalar.cu      |  144 +
 modules/gpuarithm/src/cuda/math.cu            |  302 ++
 modules/gpuarithm/src/cuda/minmax.cu          |  246 ++
 modules/gpuarithm/src/cuda/minmax_mat.cu      |  228 ++
 modules/gpuarithm/src/cuda/minmaxloc.cu       |  235 ++
 modules/gpuarithm/src/cuda/mul_mat.cu         |  211 ++
 modules/gpuarithm/src/cuda/mul_scalar.cu      |  144 +
 .../src/cuda/polar_cart.cu}                   |    0
 modules/gpuarithm/src/cuda/reduce.cu          |  330 +++
 .../src/cuda/split_merge.cu                   |    0
 modules/gpuarithm/src/cuda/sub_mat.cu         |  185 ++
 modules/gpuarithm/src/cuda/sub_scalar.cu      |  148 +
 modules/gpuarithm/src/cuda/sum.cu             |  380 +++
 modules/gpuarithm/src/cuda/threshold.cu       |  114 +
 modules/gpuarithm/src/cuda/transpose.cu       |  122 +
 modules/gpuarithm/src/cuda/unroll_detail.hpp  |  135 +
 .../src/element_operations.cpp                |  218 +-
 .../src/matrix_reductions.cpp                 |    0
 modules/gpuarithm/src/precomp.cpp             |   43 +
 modules/gpuarithm/src/precomp.hpp             |   58 +
 .../{gpu => gpuarithm}/src/split_merge.cpp    |    0
 modules/{gpu => gpuarithm}/test/test_core.cpp |    0
 modules/gpuarithm/test/test_main.cpp          |  120 +
 modules/gpuarithm/test/test_precomp.cpp       |   43 +
 modules/gpuarithm/test/test_precomp.hpp       |   60 +
 modules/stitching/CMakeLists.txt              |    2 +-
 modules/superres/CMakeLists.txt               |    2 +-
 samples/cpp/CMakeLists.txt                    |    1 +
 samples/gpu/CMakeLists.txt                    |    2 +-
 64 files changed, 6425 insertions(+), 4476 deletions(-)
 delete mode 100644 modules/gpu/src/cuda/element_operations.cu
 delete mode 100644 modules/gpu/src/cuda/matrix_reductions.cu
 create mode 100644 modules/gpuarithm/CMakeLists.txt
 create mode 100644 modules/gpuarithm/doc/gpuarithm.rst
 rename modules/{gpu => gpuarithm}/doc/matrix_reductions.rst (100%)
 rename modules/{gpu => gpuarithm}/doc/operations_on_matrices.rst (100%)
 rename modules/{gpu => gpuarithm}/doc/per_element_operations.rst (95%)
 create mode 100644 modules/gpuarithm/include/opencv2/gpuarithm.hpp
 rename modules/{gpu => gpuarithm}/perf/perf_core.cpp (100%)
 create mode 100644 modules/gpuarithm/perf/perf_main.cpp
 create mode 100644 modules/gpuarithm/perf/perf_precomp.cpp
 create mode 100644 modules/gpuarithm/perf/perf_precomp.hpp
 rename modules/{gpu => gpuarithm}/src/arithm.cpp (90%)
 create mode 100644 modules/gpuarithm/src/cuda/absdiff_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/absdiff_scalar.cu
 create mode 100644 modules/gpuarithm/src/cuda/add_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/add_scalar.cu
 create mode 100644 modules/gpuarithm/src/cuda/add_weighted.cu
 create mode 100644 modules/gpuarithm/src/cuda/arithm_func_traits.hpp
 create mode 100644 modules/gpuarithm/src/cuda/bitwise_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/bitwise_scalar.cu
 create mode 100644 modules/gpuarithm/src/cuda/cmp_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/cmp_scalar.cu
 create mode 100644 modules/gpuarithm/src/cuda/countnonzero.cu
 create mode 100644 modules/gpuarithm/src/cuda/div_inv.cu
 create mode 100644 modules/gpuarithm/src/cuda/div_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/div_scalar.cu
 create mode 100644 modules/gpuarithm/src/cuda/math.cu
 create mode 100644 modules/gpuarithm/src/cuda/minmax.cu
 create mode 100644 modules/gpuarithm/src/cuda/minmax_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/minmaxloc.cu
 create mode 100644 modules/gpuarithm/src/cuda/mul_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/mul_scalar.cu
 rename modules/{gpu/src/cuda/mathfunc.cu => gpuarithm/src/cuda/polar_cart.cu} (100%)
 create mode 100644 modules/gpuarithm/src/cuda/reduce.cu
 rename modules/{gpu => gpuarithm}/src/cuda/split_merge.cu (100%)
 create mode 100644 modules/gpuarithm/src/cuda/sub_mat.cu
 create mode 100644 modules/gpuarithm/src/cuda/sub_scalar.cu
 create mode 100644 modules/gpuarithm/src/cuda/sum.cu
 create mode 100644 modules/gpuarithm/src/cuda/threshold.cu
 create mode 100644 modules/gpuarithm/src/cuda/transpose.cu
 create mode 100644 modules/gpuarithm/src/cuda/unroll_detail.hpp
 rename modules/{gpu => gpuarithm}/src/element_operations.cpp (97%)
 rename modules/{gpu => gpuarithm}/src/matrix_reductions.cpp (100%)
 create mode 100644 modules/gpuarithm/src/precomp.cpp
 create mode 100644 modules/gpuarithm/src/precomp.hpp
 rename modules/{gpu => gpuarithm}/src/split_merge.cpp (100%)
 rename modules/{gpu => gpuarithm}/test/test_core.cpp (100%)
 create mode 100644 modules/gpuarithm/test/test_main.cpp
 create mode 100644 modules/gpuarithm/test/test_precomp.cpp
 create mode 100644 modules/gpuarithm/test/test_precomp.hpp

diff --git a/modules/core/include/opencv2/core/cuda/emulation.hpp b/modules/core/include/opencv2/core/cuda/emulation.hpp
index 3df26468b..b484f2378 100644
--- a/modules/core/include/opencv2/core/cuda/emulation.hpp
+++ b/modules/core/include/opencv2/core/cuda/emulation.hpp
@@ -43,6 +43,7 @@
 #ifndef OPENCV_GPU_EMULATION_HPP_
 #define OPENCV_GPU_EMULATION_HPP_
 
+#include "common.hpp"
 #include "warp_reduce.hpp"
 
 namespace cv { namespace gpu { namespace cudev
@@ -131,8 +132,130 @@ namespace cv { namespace gpu { namespace cudev
                 return ::atomicMin(address, val);
 #endif
             }
+        }; // struct cmem
+
+        struct glob
+        {
+            static __device__ __forceinline__ int atomicAdd(int* address, int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ float atomicAdd(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 200
+                return ::atomicAdd(address, val);
+            #else
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(val + __int_as_float(assumed)));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #endif
+            }
+            static __device__ __forceinline__ double atomicAdd(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(val + __longlong_as_double(assumed)));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMin(int* address, int val)
+            {
+                return ::atomicMin(address, val);
+            }
+            static __device__ __forceinline__ float atomicMin(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fminf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMin(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMax(int* address, int val)
+            {
+                return ::atomicMax(address, val);
+            }
+            static __device__ __forceinline__ float atomicMax(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fmaxf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMax(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
         };
-    };
+    }; //struct Emulation
 }}} // namespace cv { namespace gpu { namespace cudev
 
 #endif /* OPENCV_GPU_EMULATION_HPP_ */
diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 6f2f1145e..2f884b3f9 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -3,7 +3,7 @@ if(ANDROID OR IOS)
 endif()
 
 set(the_description "GPU-accelerated Computer Vision")
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy)
+ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
 
@@ -58,10 +58,6 @@ if(HAVE_CUDA)
     CUDA_ADD_CUFFT_TO_TARGET(${the_module})
   endif()
 
-  if(HAVE_CUBLAS)
-    CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
-  endif()
-
   install(FILES src/nvidia/NPP_staging/NPP_staging.hpp  src/nvidia/core/NCV.hpp
     DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2/${name}
     COMPONENT main)
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index b21e2abac..f17ed7079 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -8,10 +8,7 @@ gpu. GPU-accelerated Computer Vision
     introduction
     initalization_and_information
     data_structures
-    operations_on_matrices
-    per_element_operations
     image_processing
-    matrix_reductions
     object_detection
     feature_detection_and_description
     image_filtering
diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpu/doc/image_processing.rst
index 7b404c832..69e500374 100644
--- a/modules/gpu/doc/image_processing.rst
+++ b/modules/gpu/doc/image_processing.rst
@@ -414,28 +414,6 @@ The methods support arbitrary permutations of the original channels, including r
 
 
 
-gpu::threshold
-------------------
-Applies a fixed-level threshold to each array element.
-
-.. ocv:function:: double gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null())
-
-    :param src: Source array (single-channel).
-
-    :param dst: Destination array with the same size and type as  ``src`` .
-
-    :param thresh: Threshold value.
-
-    :param maxval: Maximum value to use with  ``THRESH_BINARY`` and  ``THRESH_BINARY_INV`` threshold types.
-
-    :param type: Threshold type. For details, see  :ocv:func:`threshold` . The ``THRESH_OTSU`` threshold type is not supported.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`threshold`
-
-
-
 gpu::resize
 ---------------
 Resizes an image.
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 0b13fc01d..cfad81738 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -50,6 +50,7 @@
 #endif
 
 #include "opencv2/core/gpumat.hpp"
+#include "opencv2/gpuarithm.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/features2d.hpp"
@@ -269,182 +270,8 @@ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat&
 //! supports only ksize = 1 and ksize = 3
 CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
 
+////////////////////////////// Image processing //////////////////////////////
 
-////////////////////////////// Arithmetics ///////////////////////////////////
-
-//! implements generalized matrix product algorithm GEMM from BLAS
-CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
-    const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());
-
-//! transposes the matrix
-//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc)
-CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! reverses the order of the rows, columns or both in a matrix
-//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth
-CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null());
-
-//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
-//! destination array will have the depth type as lut and the same channels number as source
-//! supports CV_8UC1, CV_8UC3 types
-CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! makes multi-channel array out of several single-channel arrays
-CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! makes multi-channel array out of several single-channel arrays
-CV_EXPORTS void merge(const std::vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! copies each plane of a multi-channel array to a dedicated array
-CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null());
-
-//! copies each plane of a multi-channel array to a dedicated array
-CV_EXPORTS void split(const GpuMat& src, std::vector<GpuMat>& dst, Stream& stream = Stream::Null());
-
-//! computes magnitude of complex (x(i).re, x(i).im) vector
-//! supports only CV_32FC2 type
-CV_EXPORTS void magnitude(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null());
-
-//! computes squared magnitude of complex (x(i).re, x(i).im) vector
-//! supports only CV_32FC2 type
-CV_EXPORTS void magnitudeSqr(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null());
-
-//! computes magnitude of each (x(i), y(i)) vector
-//! supports only floating-point source
-CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
-
-//! computes squared magnitude of each (x(i), y(i)) vector
-//! supports only floating-point source
-CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
-
-//! computes angle (angle(i)) of each (x(i), y(i)) vector
-//! supports only floating-point source
-CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
-
-//! converts Cartesian coordinates to polar
-//! supports only floating-point source
-CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
-
-//! converts polar coordinates to Cartesian
-//! supports only floating-point source
-CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null());
-
-//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
-CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0,
-                          int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat());
-CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b,
-                          int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf);
-
-
-//////////////////////////// Per-element operations ////////////////////////////////////
-
-//! adds one matrix to another (c = a + b)
-CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
-//! adds scalar to a matrix (c = a + s)
-CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
-
-//! subtracts one matrix from another (c = a - b)
-CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
-//! subtracts scalar from a matrix (c = a - s)
-CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
-
-//! computes element-wise weighted product of the two arrays (c = scale * a * b)
-CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
-//! weighted multiplies matrix to a scalar (c = scale * a * s)
-CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
-
-//! computes element-wise weighted quotient of the two arrays (c = a / b)
-CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
-//! computes element-wise weighted quotient of matrix and scalar (c = a / s)
-CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
-//! computes element-wise weighted reciprocal of an array (dst = scale/src2)
-CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null());
-
-//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma)
-CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst,
-                            int dtype = -1, Stream& stream = Stream::Null());
-
-//! adds scaled array to another one (dst = alpha*src1 + src2)
-static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null())
-{
-    addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream);
-}
-
-//! computes element-wise absolute difference of two arrays (c = abs(a - b))
-CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
-//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
-CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null());
-
-//! computes absolute value of each matrix element
-//! supports CV_16S and CV_32F depth
-CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! computes square of each pixel in an image
-//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
-CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! computes square root of each pixel in an image
-//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
-CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! computes exponent of each matrix element (b = e**a)
-//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
-CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
-
-//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
-//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
-CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
-
-//! computes power of each matrix element:
-//    (dst(i,j) = pow(     src(i,j) , power), if src.type() is integer
-//    (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
-//! supports all, except depth == CV_64F
-CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! compares elements of two arrays (c = a <cmpop> b)
-CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
-CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
-
-//! performs per-elements bit-wise inversion
-CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
-
-//! calculates per-element bit-wise disjunction of two arrays
-CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
-//! calculates per-element bit-wise disjunction of array and scalar
-//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
-CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! calculates per-element bit-wise conjunction of two arrays
-CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
-//! calculates per-element bit-wise conjunction of array and scalar
-//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
-CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! calculates per-element bit-wise "exclusive or" operation
-CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
-//! calculates per-element bit-wise "exclusive or" of array and scalar
-//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
-CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! pixel by pixel right shift of an image by a constant value
-//! supports 1, 3 and 4 channels images with integers elements
-CV_EXPORTS void rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! pixel by pixel left shift of an image by a constant value
-//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
-CV_EXPORTS void lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! computes per-element minimum of two arrays (dst = min(src1, src2))
-CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! computes per-element minimum of array and scalar (dst = min(src1, src2))
-CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! computes per-element maximum of two arrays (dst = max(src1, src2))
-CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! computes per-element maximum of array and scalar (dst = max(src1, src2))
-CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
 
 enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
        ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
@@ -453,9 +280,6 @@ enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA
 //! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
 CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
 
-
-////////////////////////////// Image processing //////////////////////////////
-
 //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
 //! supports only CV_32FC1 map type
 CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
@@ -521,9 +345,6 @@ CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& strea
 //! Routines for correcting image color gamma
 CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
 
-//! applies fixed threshold to the image
-CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
-
 //! resizes the image
 //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
 CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
@@ -794,62 +615,6 @@ private:
     CannyBuf cannyBuf_;
 };
 
-////////////////////////////// Matrix reductions //////////////////////////////
-
-//! computes mean value and standard deviation of all or selected array elements
-//! supports only CV_8UC1 type
-CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
-//! buffered version
-CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
-
-//! computes norm of array
-//! supports NORM_INF, NORM_L1, NORM_L2
-//! supports all matrices except 64F
-CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2);
-CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf);
-CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf);
-
-//! computes norm of the difference between two arrays
-//! supports NORM_INF, NORM_L1, NORM_L2
-//! supports only CV_8UC1 type
-CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2);
-
-//! computes sum of array elements
-//! supports only single channel images
-CV_EXPORTS Scalar sum(const GpuMat& src);
-CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf);
-CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
-
-//! computes sum of array elements absolute values
-//! supports only single channel images
-CV_EXPORTS Scalar absSum(const GpuMat& src);
-CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf);
-CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
-
-//! computes squared sum of array elements
-//! supports only single channel images
-CV_EXPORTS Scalar sqrSum(const GpuMat& src);
-CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf);
-CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
-
-//! finds global minimum and maximum array elements and returns their values
-CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat());
-CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf);
-
-//! finds global minimum and maximum array elements and returns their values with locations
-CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
-                          const GpuMat& mask=GpuMat());
-CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
-                          const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf);
-
-//! counts non-zero array elements
-CV_EXPORTS int countNonZero(const GpuMat& src);
-CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf);
-
-//! reduces a matrix to a vector
-CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
-
-
 ///////////////////////////// Calibration 3D //////////////////////////////////
 
 CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu
deleted file mode 100644
index 095d8bac0..000000000
--- a/modules/gpu/src/cuda/element_operations.cu
+++ /dev/null
@@ -1,2636 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined CUDA_DISABLER
-
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/vec_math.hpp"
-#include "opencv2/core/cuda/transform.hpp"
-#include "opencv2/core/cuda/limits.hpp"
-#include "opencv2/core/cuda/saturate_cast.hpp"
-#include "opencv2/core/cuda/simd_functions.hpp"
-
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
-
-namespace arithm
-{
-    template <size_t src_size, size_t dst_size> struct ArithmFuncTraits
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 1 };
-    };
-
-    template <> struct ArithmFuncTraits<1, 1>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-    template <> struct ArithmFuncTraits<1, 2>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-    template <> struct ArithmFuncTraits<1, 4>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-
-    template <> struct ArithmFuncTraits<2, 1>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-    template <> struct ArithmFuncTraits<2, 2>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-    template <> struct ArithmFuncTraits<2, 4>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-
-    template <> struct ArithmFuncTraits<4, 1>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-    template <> struct ArithmFuncTraits<4, 2>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-    template <> struct ArithmFuncTraits<4, 4>
-    {
-        enum { simple_block_dim_x = 32 };
-        enum { simple_block_dim_y = 8 };
-
-        enum { smart_block_dim_x = 32 };
-        enum { smart_block_dim_y = 8 };
-        enum { smart_shift = 4 };
-    };
-}
-
-//////////////////////////////////////////////////////////////////////////
-// addMat
-
-namespace arithm
-{
-    struct VAdd4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vadd4(a, b);
-        }
-
-        __device__ __forceinline__ VAdd4() {}
-        __device__ __forceinline__ VAdd4(const VAdd4& other) {}
-    };
-
-    ////////////////////////////////////
-
-    struct VAdd2 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vadd2(a, b);
-        }
-
-        __device__ __forceinline__ VAdd2() {}
-        __device__ __forceinline__ VAdd2(const VAdd2& other) {}
-    };
-
-    ////////////////////////////////////
-
-    template <typename T, typename D> struct AddMat : binary_function<T, T, D>
-    {
-        __device__ __forceinline__ D operator ()(T a, T b) const
-        {
-            return saturate_cast<D>(a + b);
-        }
-
-        __device__ __forceinline__ AddMat() {}
-        __device__ __forceinline__ AddMat(const AddMat& other) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <typename T, typename D> struct TransformFunctorTraits< arithm::AddMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
-    }
-
-    void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
-    }
-
-    template <typename T, typename D>
-    void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
-    }
-
-    template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    template void addMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// addScalar
-
-namespace arithm
-{
-    template <typename T, typename S, typename D> struct AddScalar : unary_function<T, D>
-    {
-        S val;
-
-        explicit AddScalar(S val_) : val(val_) {}
-
-        __device__ __forceinline__ D operator ()(T a) const
-        {
-            return saturate_cast<D>(a + val);
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T, typename S, typename D>
-    void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        AddScalar<T, S, D> op(static_cast<S>(val));
-
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-    }
-
-    template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    template void addScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void addScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void addScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void addScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// subMat
-
-namespace arithm
-{
-    struct VSub4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vsub4(a, b);
-        }
-
-        __device__ __forceinline__ VSub4() {}
-        __device__ __forceinline__ VSub4(const VSub4& other) {}
-    };
-
-    ////////////////////////////////////
-
-    struct VSub2 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vsub2(a, b);
-        }
-
-        __device__ __forceinline__ VSub2() {}
-        __device__ __forceinline__ VSub2(const VSub2& other) {}
-    };
-
-    ////////////////////////////////////
-
-    template <typename T, typename D> struct SubMat : binary_function<T, T, D>
-    {
-        __device__ __forceinline__ D operator ()(T a, T b) const
-        {
-            return saturate_cast<D>(a - b);
-        }
-
-        __device__ __forceinline__ SubMat() {}
-        __device__ __forceinline__ SubMat(const SubMat& other) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
-    }
-
-    void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
-    }
-
-    template <typename T, typename D>
-    void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
-    }
-
-    template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    template void subMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// subScalar
-
-namespace arithm
-{
-    template <typename T, typename S, typename D>
-    void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        AddScalar<T, S, D> op(-static_cast<S>(val));
-
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-    }
-
-    template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    template void subScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    //template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    //template void subScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void subScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// mulMat
-
-namespace arithm
-{
-    struct Mul_8uc4_32f : binary_function<uint, float, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, float b) const
-        {
-            uint res = 0;
-
-            res |= (saturate_cast<uchar>((0xffu & (a      )) * b)      );
-            res |= (saturate_cast<uchar>((0xffu & (a >>  8)) * b) <<  8);
-            res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
-            res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
-
-            return res;
-        }
-
-        __device__ __forceinline__ Mul_8uc4_32f() {}
-        __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
-    };
-
-    struct Mul_16sc4_32f : binary_function<short4, float, short4>
-    {
-        __device__ __forceinline__ short4 operator ()(short4 a, float b) const
-        {
-            return make_short4(saturate_cast<short>(a.x * b), saturate_cast<short>(a.y * b),
-                               saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
-        }
-
-        __device__ __forceinline__ Mul_16sc4_32f() {}
-        __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
-    };
-
-    template <typename T, typename D> struct Mul : binary_function<T, T, D>
-    {
-        __device__ __forceinline__ D operator ()(T a, T b) const
-        {
-            return saturate_cast<D>(a * b);
-        }
-
-        __device__ __forceinline__ Mul() {}
-        __device__ __forceinline__ Mul(const Mul& other) {}
-    };
-
-    template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
-    {
-        S scale;
-
-        explicit MulScale(S scale_) : scale(scale_) {}
-
-        __device__ __forceinline__ D operator ()(T a, T b) const
-        {
-            return saturate_cast<D>(scale * a * b);
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    template <typename T, typename D> struct TransformFunctorTraits< arithm::Mul<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-
-    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void mulMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
-    }
-
-    void mulMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
-    }
-
-    template <typename T, typename S, typename D>
-    void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
-    {
-        if (scale == 1)
-        {
-            Mul<T, D> op;
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-        }
-        else
-        {
-            MulScale<T, S, D> op(static_cast<S>(scale));
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-        }
-    }
-
-    template void mulMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    template void mulMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void mulMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void mulMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void mulMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void mulMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void mulMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void mulMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void mulMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// mulScalar
-
-namespace arithm
-{
-    template <typename T, typename S, typename D> struct MulScalar : unary_function<T, D>
-    {
-        S val;
-
-        explicit MulScalar(S val_) : val(val_) {}
-
-        __device__ __forceinline__ D operator ()(T a) const
-        {
-            return saturate_cast<D>(a * val);
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T, typename S, typename D>
-    void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
-    {
-        MulScalar<T, S, D> op(static_cast<S>(val));
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-    }
-
-    template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    template void mulScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void mulScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void mulScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void mulScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void mulScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void mulScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void mulScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void mulScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// divMat
-
-namespace arithm
-{
-    struct Div_8uc4_32f : binary_function<uint, float, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, float b) const
-        {
-            uint res = 0;
-
-            if (b != 0)
-            {
-                b = 1.0f / b;
-                res |= (saturate_cast<uchar>((0xffu & (a      )) * b)      );
-                res |= (saturate_cast<uchar>((0xffu & (a >>  8)) * b) <<  8);
-                res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
-                res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
-            }
-
-            return res;
-        }
-    };
-
-    struct Div_16sc4_32f : binary_function<short4, float, short4>
-    {
-        __device__ __forceinline__ short4 operator ()(short4 a, float b) const
-        {
-            return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
-                                        saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
-                          : make_short4(0,0,0,0);
-        }
-    };
-
-    template <typename T, typename D> struct Div : binary_function<T, T, D>
-    {
-        __device__ __forceinline__ D operator ()(T a, T b) const
-        {
-            return b != 0 ? saturate_cast<D>(a / b) : 0;
-        }
-
-        __device__ __forceinline__ Div() {}
-        __device__ __forceinline__ Div(const Div& other) {}
-    };
-    template <typename T> struct Div<T, float> : binary_function<T, T, float>
-    {
-        __device__ __forceinline__ float operator ()(T a, T b) const
-        {
-            return b != 0 ? static_cast<float>(a) / b : 0;
-        }
-
-        __device__ __forceinline__ Div() {}
-        __device__ __forceinline__ Div(const Div& other) {}
-    };
-    template <typename T> struct Div<T, double> : binary_function<T, T, double>
-    {
-        __device__ __forceinline__ double operator ()(T a, T b) const
-        {
-            return b != 0 ? static_cast<double>(a) / b : 0;
-        }
-
-        __device__ __forceinline__ Div() {}
-        __device__ __forceinline__ Div(const Div& other) {}
-    };
-
-    template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
-    {
-        S scale;
-
-        explicit DivScale(S scale_) : scale(scale_) {}
-
-        __device__ __forceinline__ D operator ()(T a, T b) const
-        {
-            return b != 0 ? saturate_cast<D>(scale * a / b) : 0;
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-
-    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
-    }
-
-    void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
-    }
-
-    template <typename T, typename S, typename D>
-    void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
-    {
-        if (scale == 1)
-        {
-            Div<T, D> op;
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-        }
-        else
-        {
-            DivScale<T, S, D> op(static_cast<S>(scale));
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-        }
-    }
-
-    template void divMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    template void divMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void divMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void divMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void divMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void divMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-
-    //template void divMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    //template void divMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-    template void divMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// divScalar
-
-namespace arithm
-{
-    template <typename T, typename S, typename D>
-    void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
-    {
-        MulScalar<T, S, D> op(static_cast<S>(1.0 / val));
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-    }
-
-    template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    template void divScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// divInv
-
-namespace arithm
-{
-    template <typename T, typename S, typename D> struct DivInv : unary_function<T, D>
-    {
-        S val;
-
-        explicit DivInv(S val_) : val(val_) {}
-
-        __device__ __forceinline__ D operator ()(T a) const
-        {
-            return a != 0 ? saturate_cast<D>(val / a) : 0;
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T, typename S, typename D>
-    void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
-    {
-        DivInv<T, S, D> op(static_cast<S>(val));
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-    }
-
-    template void divInv<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    template void divInv<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divInv<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divInv<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divInv<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divInv<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-
-    //template void divInv<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    //template void divInv<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-    template void divInv<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// absDiffMat
-
-namespace arithm
-{
-    struct VAbsDiff4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vabsdiff4(a, b);
-        }
-
-        __device__ __forceinline__ VAbsDiff4() {}
-        __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
-    };
-
-    ////////////////////////////////////
-
-    struct VAbsDiff2 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vabsdiff2(a, b);
-        }
-
-        __device__ __forceinline__ VAbsDiff2() {}
-        __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
-    };
-
-    ////////////////////////////////////
-
-    __device__ __forceinline__ int _abs(int a)
-    {
-        return ::abs(a);
-    }
-    __device__ __forceinline__ float _abs(float a)
-    {
-        return ::fabsf(a);
-    }
-    __device__ __forceinline__ double _abs(double a)
-    {
-        return ::fabs(a);
-    }
-
-    template <typename T> struct AbsDiffMat : binary_function<T, T, T>
-    {
-        __device__ __forceinline__ T operator ()(T a, T b) const
-        {
-            return saturate_cast<T>(_abs(a - b));
-        }
-
-        __device__ __forceinline__ AbsDiffMat() {}
-        __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <typename T> struct TransformFunctorTraits< arithm::AbsDiffMat<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
-    }
-
-    void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
-    }
-
-    template <typename T>
-    void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
-    }
-
-    template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffMat<schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffMat<short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffMat<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffMat<float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// absDiffScalar
-
-namespace arithm
-{
-    template <typename T, typename S> struct AbsDiffScalar : unary_function<T, T>
-    {
-        S val;
-
-        explicit AbsDiffScalar(S val_) : val(val_) {}
-
-        __device__ __forceinline__ T operator ()(T a) const
-        {
-            abs_func<S> f;
-            return saturate_cast<T>(f(a - val));
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T, typename S>
-    void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
-    {
-        AbsDiffScalar<T, S> op(static_cast<S>(val));
-
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
-    }
-
-    template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffScalar<schar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffScalar<ushort, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffScalar<short, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffScalar<int, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffScalar<float, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void absDiffScalar<double, double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// absMat
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T>
-    void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
-    }
-
-    template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void absMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// sqrMat
-
-namespace arithm
-{
-    template <typename T> struct Sqr : unary_function<T, T>
-    {
-        __device__ __forceinline__ T operator ()(T x) const
-        {
-            return saturate_cast<T>(x * x);
-        }
-
-        __device__ __forceinline__ Sqr() {}
-        __device__ __forceinline__ Sqr(const Sqr& other) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T>
-    void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
-    }
-
-    template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// sqrtMat
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T>
-    void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
-    }
-
-    template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void sqrtMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// logMat
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T>
-    void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
-    }
-
-    template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void logMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// expMat
-
-namespace arithm
-{
-    template <typename T> struct Exp : unary_function<T, T>
-    {
-        __device__ __forceinline__ T operator ()(T x) const
-        {
-            exp_func<T> f;
-            return saturate_cast<T>(f(x));
-        }
-
-        __device__ __forceinline__ Exp() {}
-        __device__ __forceinline__ Exp(const Exp& other) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T>
-    void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
-    }
-
-    template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    template void expMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////////////////
-// cmpMat
-
-namespace arithm
-{
-    struct VCmpEq4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vcmpeq4(a, b);
-        }
-
-        __device__ __forceinline__ VCmpEq4() {}
-        __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
-    };
-    struct VCmpNe4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vcmpne4(a, b);
-        }
-
-        __device__ __forceinline__ VCmpNe4() {}
-        __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
-    };
-    struct VCmpLt4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vcmplt4(a, b);
-        }
-
-        __device__ __forceinline__ VCmpLt4() {}
-        __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
-    };
-    struct VCmpLe4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vcmple4(a, b);
-        }
-
-        __device__ __forceinline__ VCmpLe4() {}
-        __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
-    };
-
-    ////////////////////////////////////
-
-    template <class Op, typename T>
-    struct Cmp : binary_function<T, T, uchar>
-    {
-        __device__ __forceinline__ uchar operator()(T a, T b) const
-        {
-            Op op;
-            return -op(a, b);
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-    template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-    template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-    template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
-    }
-    void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
-    }
-    void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
-    }
-    void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
-    }
-
-    template <template <typename> class Op, typename T>
-    void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        Cmp<Op<T>, T> op;
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
-    }
-
-    template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cmpMat<equal_to, T>(src1, src2, dst, stream);
-    }
-    template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cmpMat<not_equal_to, T>(src1, src2, dst, stream);
-    }
-    template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cmpMat<less, T>(src1, src2, dst, stream);
-    }
-    template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cmpMat<less_equal, T>(src1, src2, dst, stream);
-    }
-
-    template void cmpMatEq<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatEq<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatEq<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatEq<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatEq<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatEq<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatEq<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpMatNe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatNe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatNe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatNe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatNe<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatNe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatNe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpMatLt<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLt<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLt<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLt<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLt<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLt<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLt<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpMatLe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLe<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void cmpMatLe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////////////////
-// cmpScalar
-
-namespace arithm
-{
-#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type
-
-    template <class Op, typename T, int cn> struct CmpScalar;
-    template <class Op, typename T>
-    struct CmpScalar<Op, T, 1> : unary_function<T, uchar>
-    {
-        const T val;
-
-        __host__ explicit CmpScalar(T val_) : val(val_) {}
-
-        __device__ __forceinline__ uchar operator()(T src) const
-        {
-            Cmp<Op, T> op;
-            return op(src, val);
-        }
-    };
-    template <class Op, typename T>
-    struct CmpScalar<Op, T, 2> : unary_function<TYPE_VEC(T, 2), TYPE_VEC(uchar, 2)>
-    {
-        const TYPE_VEC(T, 2) val;
-
-        __host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {}
-
-        __device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const
-        {
-            Cmp<Op, T> op;
-            return VecTraits<TYPE_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y));
-        }
-    };
-    template <class Op, typename T>
-    struct CmpScalar<Op, T, 3> : unary_function<TYPE_VEC(T, 3), TYPE_VEC(uchar, 3)>
-    {
-        const TYPE_VEC(T, 3) val;
-
-        __host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {}
-
-        __device__ __forceinline__ TYPE_VEC(uchar, 3) operator()(const TYPE_VEC(T, 3) & src) const
-        {
-            Cmp<Op, T> op;
-            return VecTraits<TYPE_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z));
-        }
-    };
-    template <class Op, typename T>
-    struct CmpScalar<Op, T, 4> : unary_function<TYPE_VEC(T, 4), TYPE_VEC(uchar, 4)>
-    {
-        const TYPE_VEC(T, 4) val;
-
-        __host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {}
-
-        __device__ __forceinline__ TYPE_VEC(uchar, 4) operator()(const TYPE_VEC(T, 4) & src) const
-        {
-            Cmp<Op, T> op;
-            return VecTraits<TYPE_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w));
-        }
-    };
-
-#undef TYPE_VEC
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <template <typename> class Op, typename T, int cn>
-    void cmpScalar(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream)
-    {
-        typedef typename TypeVec<T, cn>::vec_type src_t;
-        typedef typename TypeVec<uchar, cn>::vec_type dst_t;
-
-        T sval[] = {static_cast<T>(val[0]), static_cast<T>(val[1]), static_cast<T>(val[2]), static_cast<T>(val[3])};
-        src_t val1 = VecTraits<src_t>::make(sval);
-
-        CmpScalar<Op<T>, T, cn> op(val1);
-        cudev::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
-    }
-
-    template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            0,
-            cmpScalar<equal_to, T, 1>,
-            cmpScalar<equal_to, T, 2>,
-            cmpScalar<equal_to, T, 3>,
-            cmpScalar<equal_to, T, 4>
-        };
-
-        funcs[cn](src, val, dst, stream);
-    }
-    template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            0,
-            cmpScalar<not_equal_to, T, 1>,
-            cmpScalar<not_equal_to, T, 2>,
-            cmpScalar<not_equal_to, T, 3>,
-            cmpScalar<not_equal_to, T, 4>
-        };
-
-        funcs[cn](src, val, dst, stream);
-    }
-    template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            0,
-            cmpScalar<less, T, 1>,
-            cmpScalar<less, T, 2>,
-            cmpScalar<less, T, 3>,
-            cmpScalar<less, T, 4>
-        };
-
-        funcs[cn](src, val, dst, stream);
-    }
-    template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            0,
-            cmpScalar<less_equal, T, 1>,
-            cmpScalar<less_equal, T, 2>,
-            cmpScalar<less_equal, T, 3>,
-            cmpScalar<less_equal, T, 4>
-        };
-
-        funcs[cn](src, val, dst, stream);
-    }
-    template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            0,
-            cmpScalar<greater, T, 1>,
-            cmpScalar<greater, T, 2>,
-            cmpScalar<greater, T, 3>,
-            cmpScalar<greater, T, 4>
-        };
-
-        funcs[cn](src, val, dst, stream);
-    }
-    template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            0,
-            cmpScalar<greater_equal, T, 1>,
-            cmpScalar<greater_equal, T, 2>,
-            cmpScalar<greater_equal, T, 3>,
-            cmpScalar<greater_equal, T, 4>
-        };
-
-        funcs[cn](src, val, dst, stream);
-    }
-
-    template void cmpScalarEq<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarEq<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarEq<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarEq<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarEq<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarEq<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarEq<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpScalarNe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarNe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarNe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarNe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarNe<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarNe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarNe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpScalarLt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLt<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpScalarLe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLe<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarLe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpScalarGt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGt<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-
-    template void cmpScalarGe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGe<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-    template void cmpScalarGe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////////////////
-// bitMat
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
-    }
-
-    template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
-    }
-
-    template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
-    }
-
-    template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
-    {
-        if (mask.data)
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
-        else
-            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
-    }
-
-    template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
-    template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-    template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////////////////
-// bitScalar
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
-    }
-
-    template <typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
-    }
-
-    template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
-    }
-
-    template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-
-    template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-
-    template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// min
-
-namespace arithm
-{
-    struct VMin4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vmin4(a, b);
-        }
-
-        __device__ __forceinline__ VMin4() {}
-        __device__ __forceinline__ VMin4(const VMin4& other) {}
-    };
-
-    ////////////////////////////////////
-
-    struct VMin2 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vmin2(a, b);
-        }
-
-        __device__ __forceinline__ VMin2() {}
-        __device__ __forceinline__ VMin2(const VMin2& other) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <> struct TransformFunctorTraits< arithm::VMin2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <typename T> struct TransformFunctorTraits< minimum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void minMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
-    }
-
-    void minMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
-    }
-
-    template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
-    }
-
-    template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minMat<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-
-    template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
-    }
-
-    template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minScalar<int   >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void minScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// max
-
-namespace arithm
-{
-    struct VMax4 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vmax4(a, b);
-        }
-
-        __device__ __forceinline__ VMax4() {}
-        __device__ __forceinline__ VMax4(const VMax4& other) {}
-    };
-
-    ////////////////////////////////////
-
-    struct VMax2 : binary_function<uint, uint, uint>
-    {
-        __device__ __forceinline__ uint operator ()(uint a, uint b) const
-        {
-            return vmax2(a, b);
-        }
-
-        __device__ __forceinline__ VMax2() {}
-        __device__ __forceinline__ VMax2(const VMax2& other) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <> struct TransformFunctorTraits< arithm::VMax2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
-    {
-    };
-
-    ////////////////////////////////////
-
-    template <typename T> struct TransformFunctorTraits< maximum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    void maxMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
-    }
-
-    void maxMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
-    {
-        cudev::transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
-    }
-
-    template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
-    }
-
-    template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxMat<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-
-    template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
-    }
-
-    template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxScalar<int   >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    template void maxScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// threshold
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-
-    template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <template <typename> class Op, typename T>
-    void threshold_caller(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream)
-    {
-        Op<T> op(thresh, maxVal);
-        cudev::transform(src, dst, op, WithOutMask(), stream);
-    }
-
-    template <typename T>
-    void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream)
-    {
-        typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream);
-
-        static const caller_t callers[] =
-        {
-            threshold_caller<thresh_binary_func, T>,
-            threshold_caller<thresh_binary_inv_func, T>,
-            threshold_caller<thresh_trunc_func, T>,
-            threshold_caller<thresh_to_zero_func, T>,
-            threshold_caller<thresh_to_zero_inv_func, T>
-        };
-
-        callers[type]((PtrStepSz<T>) src, (PtrStepSz<T>) dst, static_cast<T>(thresh), static_cast<T>(maxVal), stream);
-    }
-
-    template void threshold<uchar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-    template void threshold<schar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-    template void threshold<ushort>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-    template void threshold<short>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-    template void threshold<int>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-    template void threshold<float>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-    template void threshold<double>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// pow
-
-namespace arithm
-{
-    template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
-    {
-        float power;
-
-        PowOp(double power_) : power(static_cast<float>(power_)) {}
-
-        __device__ __forceinline__ T operator()(T e) const
-        {
-            return saturate_cast<T>(__powf((float)e, power));
-        }
-    };
-    template<typename T> struct PowOp<T, true> : unary_function<T, T>
-    {
-        float power;
-
-        PowOp(double power_) : power(static_cast<float>(power_)) {}
-
-        __device__ __forceinline__ T operator()(T e) const
-        {
-            T res = saturate_cast<T>(__powf((float)e, power));
-
-            if ((e < 0) && (1 & static_cast<int>(power)))
-                res *= -1;
-
-            return res;
-        }
-    };
-    template<> struct PowOp<float> : unary_function<float, float>
-    {
-        const float power;
-
-        PowOp(double power_) : power(static_cast<float>(power_)) {}
-
-        __device__ __forceinline__ float operator()(float e) const
-        {
-            return __powf(::fabs(e), power);
-        }
-    };
-    template<> struct PowOp<double> : unary_function<double, double>
-    {
-        double power;
-
-        PowOp(double power_) : power(power_) {}
-
-        __device__ __forceinline__ double operator()(double e) const
-        {
-            return ::pow(::fabs(e), power);
-        }
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template<typename T>
-    void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream)
-    {
-        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
-    }
-
-    template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<schar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<short>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<ushort>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<int>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<float>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-    template void pow<double>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
-}
-
-//////////////////////////////////////////////////////////////////////////
-// addWeighted
-
-namespace arithm
-{
-    template <typename T> struct UseDouble_
-    {
-        enum {value = 0};
-    };
-    template <> struct UseDouble_<double>
-    {
-        enum {value = 1};
-    };
-    template <typename T1, typename T2, typename D> struct UseDouble
-    {
-        enum {value = (UseDouble_<T1>::value || UseDouble_<T2>::value || UseDouble_<D>::value)};
-    };
-
-    template <typename T1, typename T2, typename D, bool useDouble> struct AddWeighted_;
-    template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, false> : binary_function<T1, T2, D>
-    {
-        float alpha;
-        float beta;
-        float gamma;
-
-        AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
-
-        __device__ __forceinline__ D operator ()(T1 a, T2 b) const
-        {
-            return saturate_cast<D>(a * alpha + b * beta + gamma);
-        }
-    };
-    template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, true> : binary_function<T1, T2, D>
-    {
-        double alpha;
-        double beta;
-        double gamma;
-
-        AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
-
-        __device__ __forceinline__ D operator ()(T1 a, T2 b) const
-        {
-            return saturate_cast<D>(a * alpha + b * beta + gamma);
-        }
-    };
-    template <typename T1, typename T2, typename D> struct AddWeighted : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>
-    {
-        AddWeighted(double alpha_, double beta_, double gamma_) : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>(alpha_, beta_, gamma_) {}
-    };
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
-    {
-    };
-    template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : arithm::ArithmFuncTraits<src_size, dst_size>
-    {
-    };
-
-    template <typename T1, typename T2, typename D> struct TransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
-    template <typename T1, typename T2, typename D>
-    void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream)
-    {
-        AddWeighted<T1, T2, D> op(alpha, beta, gamma);
-
-        cudev::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-    }
-
-    template void addWeighted<uchar, uchar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<schar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<ushort, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<short, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<short, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<short, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<short, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<int, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<int, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<int, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<float, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<float, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<double, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-}
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu
deleted file mode 100644
index 8eac19513..000000000
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ /dev/null
@@ -1,1366 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if !defined CUDA_DISABLER
-
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/limits.hpp"
-#include "opencv2/core/cuda/saturate_cast.hpp"
-#include "opencv2/core/cuda/vec_traits.hpp"
-#include "opencv2/core/cuda/vec_math.hpp"
-#include "opencv2/core/cuda/reduce.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/utility.hpp"
-#include "opencv2/core/cuda/type_traits.hpp"
-
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
-
-namespace detail
-{
-    __device__ __forceinline__ int cvAtomicAdd(int* address, int val)
-    {
-        return ::atomicAdd(address, val);
-    }
-    __device__ __forceinline__ unsigned int cvAtomicAdd(unsigned int* address, unsigned int val)
-    {
-        return ::atomicAdd(address, val);
-    }
-    __device__ __forceinline__ float cvAtomicAdd(float* address, float val)
-    {
-    #if __CUDA_ARCH__ >= 200
-        return ::atomicAdd(address, val);
-    #else
-        int* address_as_i = (int*) address;
-        int old = *address_as_i, assumed;
-        do {
-            assumed = old;
-            old = ::atomicCAS(address_as_i, assumed,
-                __float_as_int(val + __int_as_float(assumed)));
-        } while (assumed != old);
-        return __int_as_float(old);
-    #endif
-    }
-    __device__ __forceinline__ double cvAtomicAdd(double* address, double val)
-    {
-    #if __CUDA_ARCH__ >= 130
-        unsigned long long int* address_as_ull = (unsigned long long int*) address;
-        unsigned long long int old = *address_as_ull, assumed;
-        do {
-            assumed = old;
-            old = ::atomicCAS(address_as_ull, assumed,
-                __double_as_longlong(val + __longlong_as_double(assumed)));
-        } while (assumed != old);
-        return __longlong_as_double(old);
-    #else
-        (void) address;
-        (void) val;
-        return 0.0;
-    #endif
-    }
-
-    __device__ __forceinline__ int cvAtomicMin(int* address, int val)
-    {
-        return ::atomicMin(address, val);
-    }
-    __device__ __forceinline__ float cvAtomicMin(float* address, float val)
-    {
-    #if __CUDA_ARCH__ >= 120
-        int* address_as_i = (int*) address;
-        int old = *address_as_i, assumed;
-        do {
-            assumed = old;
-            old = ::atomicCAS(address_as_i, assumed,
-                __float_as_int(::fminf(val, __int_as_float(assumed))));
-        } while (assumed != old);
-        return __int_as_float(old);
-    #else
-        (void) address;
-        (void) val;
-        return 0.0f;
-    #endif
-    }
-    __device__ __forceinline__ double cvAtomicMin(double* address, double val)
-    {
-    #if __CUDA_ARCH__ >= 130
-        unsigned long long int* address_as_ull = (unsigned long long int*) address;
-        unsigned long long int old = *address_as_ull, assumed;
-        do {
-            assumed = old;
-            old = ::atomicCAS(address_as_ull, assumed,
-                __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
-        } while (assumed != old);
-        return __longlong_as_double(old);
-    #else
-        (void) address;
-        (void) val;
-        return 0.0;
-    #endif
-    }
-
-    __device__ __forceinline__ int cvAtomicMax(int* address, int val)
-    {
-        return ::atomicMax(address, val);
-    }
-    __device__ __forceinline__ float cvAtomicMax(float* address, float val)
-    {
-    #if __CUDA_ARCH__ >= 120
-        int* address_as_i = (int*) address;
-        int old = *address_as_i, assumed;
-        do {
-            assumed = old;
-            old = ::atomicCAS(address_as_i, assumed,
-                __float_as_int(::fmaxf(val, __int_as_float(assumed))));
-        } while (assumed != old);
-        return __int_as_float(old);
-    #else
-        (void) address;
-        (void) val;
-        return 0.0f;
-    #endif
-    }
-    __device__ __forceinline__ double cvAtomicMax(double* address, double val)
-    {
-    #if __CUDA_ARCH__ >= 130
-        unsigned long long int* address_as_ull = (unsigned long long int*) address;
-        unsigned long long int old = *address_as_ull, assumed;
-        do {
-            assumed = old;
-            old = ::atomicCAS(address_as_ull, assumed,
-                __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
-        } while (assumed != old);
-        return __longlong_as_double(old);
-    #else
-        (void) address;
-        (void) val;
-        return 0.0;
-    #endif
-    }
-}
-
-namespace detail
-{
-    template <int cn> struct Unroll;
-    template <> struct Unroll<1>
-    {
-        template <int BLOCK_SIZE, typename R>
-        static __device__ __forceinline__ volatile R* smem_tuple(R* smem)
-        {
-            return smem;
-        }
-
-        template <typename R>
-        static __device__ __forceinline__ R& tie(R& val)
-        {
-            return val;
-        }
-
-        template <class Op>
-        static __device__ __forceinline__ const Op& op(const Op& op)
-        {
-            return op;
-        }
-    };
-    template <> struct Unroll<2>
-    {
-        template <int BLOCK_SIZE, typename R>
-        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem)
-        {
-            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
-        }
-
-        template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&> tie(R& val)
-        {
-            return thrust::tie(val.x, val.y);
-        }
-
-        template <class Op>
-        static __device__ __forceinline__ const thrust::tuple<Op, Op> op(const Op& op)
-        {
-            return thrust::make_tuple(op, op);
-        }
-    };
-    template <> struct Unroll<3>
-    {
-        template <int BLOCK_SIZE, typename R>
-        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
-        {
-            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
-        }
-
-        template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&> tie(R& val)
-        {
-            return thrust::tie(val.x, val.y, val.z);
-        }
-
-        template <class Op>
-        static __device__ __forceinline__ const thrust::tuple<Op, Op, Op> op(const Op& op)
-        {
-            return thrust::make_tuple(op, op, op);
-        }
-    };
-    template <> struct Unroll<4>
-    {
-        template <int BLOCK_SIZE, typename R>
-        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
-        {
-            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
-        }
-
-        template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&, typename VecTraits<R>::elem_type&> tie(R& val)
-        {
-            return thrust::tie(val.x, val.y, val.z, val.w);
-        }
-
-        template <class Op>
-        static __device__ __forceinline__ const thrust::tuple<Op, Op, Op, Op> op(const Op& op)
-        {
-            return thrust::make_tuple(op, op, op, op);
-        }
-    };
-}
-
-/////////////////////////////////////////////////////////////
-// sum
-
-namespace sum
-{
-    __device__ unsigned int blocks_finished = 0;
-
-    template <typename R, int cn> struct AtomicAdd;
-    template <typename R> struct AtomicAdd<R, 1>
-    {
-        static __device__ void run(R* ptr, R val)
-        {
-            detail::cvAtomicAdd(ptr, val);
-        }
-    };
-    template <typename R> struct AtomicAdd<R, 2>
-    {
-        typedef typename TypeVec<R, 2>::vec_type val_type;
-
-        static __device__ void run(R* ptr, val_type val)
-        {
-            detail::cvAtomicAdd(ptr, val.x);
-            detail::cvAtomicAdd(ptr + 1, val.y);
-        }
-    };
-    template <typename R> struct AtomicAdd<R, 3>
-    {
-        typedef typename TypeVec<R, 3>::vec_type val_type;
-
-        static __device__ void run(R* ptr, val_type val)
-        {
-            detail::cvAtomicAdd(ptr, val.x);
-            detail::cvAtomicAdd(ptr + 1, val.y);
-            detail::cvAtomicAdd(ptr + 2, val.z);
-        }
-    };
-    template <typename R> struct AtomicAdd<R, 4>
-    {
-        typedef typename TypeVec<R, 4>::vec_type val_type;
-
-        static __device__ void run(R* ptr, val_type val)
-        {
-            detail::cvAtomicAdd(ptr, val.x);
-            detail::cvAtomicAdd(ptr + 1, val.y);
-            detail::cvAtomicAdd(ptr + 2, val.z);
-            detail::cvAtomicAdd(ptr + 3, val.w);
-        }
-    };
-
-    template <int BLOCK_SIZE, typename R, int cn>
-    struct GlobalReduce
-    {
-        typedef typename TypeVec<R, cn>::vec_type result_type;
-
-        static __device__ void run(result_type& sum, result_type* result, int tid, int bid, R* smem)
-        {
-        #if __CUDA_ARCH__ >= 200
-            if (tid == 0)
-                AtomicAdd<R, cn>::run((R*) result, sum);
-        #else
-            __shared__ bool is_last;
-
-            if (tid == 0)
-            {
-                result[bid] = sum;
-
-                __threadfence();
-
-                unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
-                is_last = (ticket == gridDim.x * gridDim.y - 1);
-            }
-
-            __syncthreads();
-
-            if (is_last)
-            {
-                sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
-
-                cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
-
-                if (tid == 0)
-                {
-                    result[0] = sum;
-                    blocks_finished = 0;
-                }
-            }
-        #endif
-        }
-    };
-
-    template <int BLOCK_SIZE, typename src_type, typename result_type, class Mask, class Op>
-    __global__ void kernel(const PtrStepSz<src_type> src, result_type* result, const Mask mask, const Op op, const int twidth, const int theight)
-    {
-        typedef typename VecTraits<src_type>::elem_type T;
-        typedef typename VecTraits<result_type>::elem_type R;
-        const int cn = VecTraits<src_type>::cn;
-
-        __shared__ R smem[BLOCK_SIZE * cn];
-
-        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
-        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
-
-        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
-        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
-
-        result_type sum = VecTraits<result_type>::all(0);
-
-        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
-        {
-            const src_type* ptr = src.ptr(y);
-
-            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
-            {
-                if (mask(y, x))
-                {
-                    const src_type srcVal = ptr[x];
-                    sum = sum + op(saturate_cast<result_type>(srcVal));
-                }
-            }
-        }
-
-        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
-
-        GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
-    }
-
-    const int threads_x = 32;
-    const int threads_y = 8;
-
-    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
-    {
-        block = dim3(threads_x, threads_y);
-
-        grid = dim3(divUp(cols, block.x * block.y),
-                    divUp(rows, block.y * block.x));
-
-        grid.x = ::min(grid.x, block.x);
-        grid.y = ::min(grid.y, block.y);
-    }
-
-    void getBufSize(int cols, int rows, int cn, int& bufcols, int& bufrows)
-    {
-        dim3 block, grid;
-        getLaunchCfg(cols, rows, block, grid);
-
-        bufcols = grid.x * grid.y * sizeof(double) * cn;
-        bufrows = 1;
-    }
-
-    template <typename T, typename R, int cn, template <typename> class Op>
-    void caller(PtrStepSzb src_, void* buf_, double* out, PtrStepSzb mask)
-    {
-        typedef typename TypeVec<T, cn>::vec_type src_type;
-        typedef typename TypeVec<R, cn>::vec_type result_type;
-
-        PtrStepSz<src_type> src(src_);
-        result_type* buf = (result_type*) buf_;
-
-        dim3 block, grid;
-        getLaunchCfg(src.cols, src.rows, block, grid);
-
-        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
-        const int theight = divUp(divUp(src.rows, grid.y), block.y);
-
-        Op<result_type> op;
-
-        if (mask.data)
-            kernel<threads_x * threads_y><<<grid, block>>>(src, buf, SingleMask(mask), op, twidth, theight);
-        else
-            kernel<threads_x * threads_y><<<grid, block>>>(src, buf, WithOutMask(), op, twidth, theight);
-        cudaSafeCall( cudaGetLastError() );
-
-        cudaSafeCall( cudaDeviceSynchronize() );
-
-        R result[4] = {0, 0, 0, 0};
-        cudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) );
-
-        out[0] = result[0];
-        out[1] = result[1];
-        out[2] = result[2];
-        out[3] = result[3];
-    }
-
-    template <typename T> struct SumType;
-    template <> struct SumType<uchar> { typedef unsigned int R; };
-    template <> struct SumType<schar> { typedef int R; };
-    template <> struct SumType<ushort> { typedef unsigned int R; };
-    template <> struct SumType<short> { typedef int R; };
-    template <> struct SumType<int> { typedef int R; };
-    template <> struct SumType<float> { typedef float R; };
-    template <> struct SumType<double> { typedef double R; };
-
-    template <typename T, int cn>
-    void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
-    {
-        typedef typename SumType<T>::R R;
-        caller<T, R, cn, identity>(src, buf, out, mask);
-    }
-
-    template void run<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void run<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void run<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void run<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void run<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void run<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void run<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void run<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template <typename T, int cn>
-    void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
-    {
-        typedef typename SumType<T>::R R;
-        caller<T, R, cn, abs_func>(src, buf, out, mask);
-    }
-
-    template void runAbs<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runAbs<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runAbs<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runAbs<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runAbs<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runAbs<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runAbs<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runAbs<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template <typename T> struct Sqr : unary_function<T, T>
-    {
-        __device__ __forceinline__ T operator ()(T x) const
-        {
-            return x * x;
-        }
-    };
-
-    template <typename T, int cn>
-    void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
-    {
-        caller<T, double, cn, Sqr>(src, buf, out, mask);
-    }
-
-    template void runSqr<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runSqr<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runSqr<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runSqr<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runSqr<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runSqr<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-
-    template void runSqr<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-    template void runSqr<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
-}
-
-/////////////////////////////////////////////////////////////
-// minMax
-
-namespace minMax
-{
-    __device__ unsigned int blocks_finished = 0;
-
-    // To avoid shared bank conflicts we convert each value into value of
-    // appropriate type (32 bits minimum)
-    template <typename T> struct MinMaxTypeTraits;
-    template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<schar> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
-    template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
-
-    template <int BLOCK_SIZE, typename R>
-    struct GlobalReduce
-    {
-        static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval)
-        {
-        #if __CUDA_ARCH__ >= 200
-            if (tid == 0)
-            {
-                detail::cvAtomicMin(minval, mymin);
-                detail::cvAtomicMax(maxval, mymax);
-            }
-        #else
-            __shared__ bool is_last;
-
-            if (tid == 0)
-            {
-                minval[bid] = mymin;
-                maxval[bid] = mymax;
-
-                __threadfence();
-
-                unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
-                is_last = (ticket == gridDim.x * gridDim.y - 1);
-            }
-
-            __syncthreads();
-
-            if (is_last)
-            {
-                int idx = ::min(tid, gridDim.x * gridDim.y - 1);
-
-                mymin = minval[idx];
-                mymax = maxval[idx];
-
-                const minimum<R> minOp;
-                const maximum<R> maxOp;
-                cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
-
-                if (tid == 0)
-                {
-                    minval[0] = mymin;
-                    maxval[0] = mymax;
-
-                    blocks_finished = 0;
-                }
-            }
-        #endif
-        }
-    };
-
-    template <int BLOCK_SIZE, typename T, typename R, class Mask>
-    __global__ void kernel(const PtrStepSz<T> src, const Mask mask, R* minval, R* maxval, const int twidth, const int theight)
-    {
-        __shared__ R sminval[BLOCK_SIZE];
-        __shared__ R smaxval[BLOCK_SIZE];
-
-        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
-        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
-
-        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
-        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
-
-        R mymin = numeric_limits<R>::max();
-        R mymax = -numeric_limits<R>::max();
-
-        const minimum<R> minOp;
-        const maximum<R> maxOp;
-
-        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
-        {
-            const T* ptr = src.ptr(y);
-
-            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
-            {
-                if (mask(y, x))
-                {
-                    const R srcVal = ptr[x];
-
-                    mymin = minOp(mymin, srcVal);
-                    mymax = maxOp(mymax, srcVal);
-                }
-            }
-        }
-
-        cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
-
-        GlobalReduce<BLOCK_SIZE, R>::run(mymin, mymax, minval, maxval, tid, bid, sminval, smaxval);
-    }
-
-    const int threads_x = 32;
-    const int threads_y = 8;
-
-    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
-    {
-        block = dim3(threads_x, threads_y);
-
-        grid = dim3(divUp(cols, block.x * block.y),
-                    divUp(rows, block.y * block.x));
-
-        grid.x = ::min(grid.x, block.x);
-        grid.y = ::min(grid.y, block.y);
-    }
-
-    void getBufSize(int cols, int rows, int& bufcols, int& bufrows)
-    {
-        dim3 block, grid;
-        getLaunchCfg(cols, rows, block, grid);
-
-        bufcols = grid.x * grid.y * sizeof(double);
-        bufrows = 2;
-    }
-
-    __global__ void setDefaultKernel(int* minval_buf, int* maxval_buf)
-    {
-        *minval_buf = numeric_limits<int>::max();
-        *maxval_buf = numeric_limits<int>::min();
-    }
-    __global__ void setDefaultKernel(float* minval_buf, float* maxval_buf)
-    {
-        *minval_buf = numeric_limits<float>::max();
-        *maxval_buf = -numeric_limits<float>::max();
-    }
-    __global__ void setDefaultKernel(double* minval_buf, double* maxval_buf)
-    {
-        *minval_buf = numeric_limits<double>::max();
-        *maxval_buf = -numeric_limits<double>::max();
-    }
-
-    template <typename R>
-    void setDefault(R* minval_buf, R* maxval_buf)
-    {
-        setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf);
-    }
-
-    template <typename T>
-    void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
-    {
-        typedef typename MinMaxTypeTraits<T>::best_type R;
-
-        dim3 block, grid;
-        getLaunchCfg(src.cols, src.rows, block, grid);
-
-        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
-        const int theight = divUp(divUp(src.rows, grid.y), block.y);
-
-        R* minval_buf = (R*) buf.ptr(0);
-        R* maxval_buf = (R*) buf.ptr(1);
-
-        setDefault(minval_buf, maxval_buf);
-
-        if (mask.data)
-            kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, twidth, theight);
-        else
-            kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight);
-
-        cudaSafeCall( cudaGetLastError() );
-
-        cudaSafeCall( cudaDeviceSynchronize() );
-
-        R minval_, maxval_;
-        cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
-        cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
-        *minval = minval_;
-        *maxval = maxval_;
-    }
-
-    template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
-    template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
-    template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
-    template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
-    template void run<int   >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
-    template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
-    template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
-}
-
-/////////////////////////////////////////////////////////////
-// minMaxLoc
-
-namespace minMaxLoc
-{
-    // To avoid shared bank conflicts we convert each value into value of
-    // appropriate type (32 bits minimum)
-    template <typename T> struct MinMaxTypeTraits;
-    template <> struct MinMaxTypeTraits<unsigned char> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<signed char> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<unsigned short> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
-    template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
-
-    template <int BLOCK_SIZE, typename T, class Mask>
-    __global__ void kernel_pass_1(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight)
-    {
-        typedef typename MinMaxTypeTraits<T>::best_type work_type;
-
-        __shared__ work_type sminval[BLOCK_SIZE];
-        __shared__ work_type smaxval[BLOCK_SIZE];
-        __shared__ unsigned int sminloc[BLOCK_SIZE];
-        __shared__ unsigned int smaxloc[BLOCK_SIZE];
-
-        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
-        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
-
-        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
-        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
-
-        work_type mymin = numeric_limits<work_type>::max();
-        work_type mymax = -numeric_limits<work_type>::max();
-        unsigned int myminloc = 0;
-        unsigned int mymaxloc = 0;
-
-        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
-        {
-            const T* ptr = src.ptr(y);
-
-            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
-            {
-                if (mask(y, x))
-                {
-                    const work_type srcVal = ptr[x];
-
-                    if (srcVal < mymin)
-                    {
-                        mymin = srcVal;
-                        myminloc = y * src.cols + x;
-                    }
-
-                    if (srcVal > mymax)
-                    {
-                        mymax = srcVal;
-                        mymaxloc = y * src.cols + x;
-                    }
-                }
-            }
-        }
-
-        reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
-                                 smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
-                                 tid,
-                                 thrust::make_tuple(less<work_type>(), greater<work_type>()));
-
-        if (tid == 0)
-        {
-            minval[bid] = (T) mymin;
-            maxval[bid] = (T) mymax;
-            minloc[bid] = myminloc;
-            maxloc[bid] = mymaxloc;
-        }
-    }
-    template <int BLOCK_SIZE, typename T>
-    __global__ void kernel_pass_2(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int count)
-    {
-        typedef typename MinMaxTypeTraits<T>::best_type work_type;
-
-        __shared__ work_type sminval[BLOCK_SIZE];
-        __shared__ work_type smaxval[BLOCK_SIZE];
-        __shared__ unsigned int sminloc[BLOCK_SIZE];
-        __shared__ unsigned int smaxloc[BLOCK_SIZE];
-
-        unsigned int idx = ::min(threadIdx.x, count - 1);
-
-        work_type mymin = minval[idx];
-        work_type mymax = maxval[idx];
-        unsigned int myminloc = minloc[idx];
-        unsigned int mymaxloc = maxloc[idx];
-
-        reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
-                                 smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
-                                 threadIdx.x,
-                                 thrust::make_tuple(less<work_type>(), greater<work_type>()));
-
-        if (threadIdx.x == 0)
-        {
-            minval[0] = (T) mymin;
-            maxval[0] = (T) mymax;
-            minloc[0] = myminloc;
-            maxloc[0] = mymaxloc;
-        }
-    }
-
-    const int threads_x = 32;
-    const int threads_y = 8;
-
-    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
-    {
-        block = dim3(threads_x, threads_y);
-
-        grid = dim3(divUp(cols, block.x * block.y),
-                    divUp(rows, block.y * block.x));
-
-        grid.x = ::min(grid.x, block.x);
-        grid.y = ::min(grid.y, block.y);
-    }
-
-    void getBufSize(int cols, int rows, size_t elem_size, int& b1cols, int& b1rows, int& b2cols, int& b2rows)
-    {
-        dim3 block, grid;
-        getLaunchCfg(cols, rows, block, grid);
-
-        // For values
-        b1cols = (int)(grid.x * grid.y * elem_size);
-        b1rows = 2;
-
-        // For locations
-        b2cols = grid.x * grid.y * sizeof(int);
-        b2rows = 2;
-    }
-
-    template <typename T>
-    void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf)
-    {
-        dim3 block, grid;
-        getLaunchCfg(src.cols, src.rows, block, grid);
-
-        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
-        const int theight = divUp(divUp(src.rows, grid.y), block.y);
-
-        T* minval_buf = (T*) valbuf.ptr(0);
-        T* maxval_buf = (T*) valbuf.ptr(1);
-        unsigned int* minloc_buf = locbuf.ptr(0);
-        unsigned int* maxloc_buf = locbuf.ptr(1);
-
-        if (mask.data)
-            kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
-        else
-            kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
-
-        cudaSafeCall( cudaGetLastError() );
-
-        kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
-        cudaSafeCall( cudaGetLastError() );
-
-        cudaSafeCall( cudaDeviceSynchronize() );
-
-        T minval_, maxval_;
-        cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
-        cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
-        *minval = minval_;
-        *maxval = maxval_;
-
-        unsigned int minloc_, maxloc_;
-        cudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
-        cudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
-        minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
-        maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
-    }
-
-    template void run<unsigned char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<signed char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<unsigned short>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<int   >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-}
-
-/////////////////////////////////////////////////////////////
-// countNonZero
-
-namespace countNonZero
-{
-    __device__ unsigned int blocks_finished = 0;
-
-    template <int BLOCK_SIZE, typename T>
-    __global__ void kernel(const PtrStepSz<T> src, unsigned int* count, const int twidth, const int theight)
-    {
-        __shared__ unsigned int scount[BLOCK_SIZE];
-
-        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
-        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
-
-        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
-
-        unsigned int mycount = 0;
-
-        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
-        {
-            const T* ptr = src.ptr(y);
-
-            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
-            {
-                const T srcVal = ptr[x];
-
-                mycount += (srcVal != 0);
-            }
-        }
-
-        cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
-
-    #if __CUDA_ARCH__ >= 200
-        if (tid == 0)
-            ::atomicAdd(count, mycount);
-    #else
-        __shared__ bool is_last;
-        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
-
-        if (tid == 0)
-        {
-            count[bid] = mycount;
-
-            __threadfence();
-
-            unsigned int ticket = ::atomicInc(&blocks_finished, gridDim.x * gridDim.y);
-            is_last = (ticket == gridDim.x * gridDim.y - 1);
-        }
-
-        __syncthreads();
-
-        if (is_last)
-        {
-            mycount = tid < gridDim.x * gridDim.y ? count[tid] : 0;
-
-            cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
-
-            if (tid == 0)
-            {
-                count[0] = mycount;
-
-                blocks_finished = 0;
-            }
-        }
-    #endif
-    }
-
-    const int threads_x = 32;
-    const int threads_y = 8;
-
-    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
-    {
-        block = dim3(threads_x, threads_y);
-
-        grid = dim3(divUp(cols, block.x * block.y),
-                    divUp(rows, block.y * block.x));
-
-        grid.x = ::min(grid.x, block.x);
-        grid.y = ::min(grid.y, block.y);
-    }
-
-    void getBufSize(int cols, int rows, int& bufcols, int& bufrows)
-    {
-        dim3 block, grid;
-        getLaunchCfg(cols, rows, block, grid);
-
-        bufcols = grid.x * grid.y * sizeof(int);
-        bufrows = 1;
-    }
-
-    template <typename T>
-    int run(const PtrStepSzb src, PtrStep<unsigned int> buf)
-    {
-        dim3 block, grid;
-        getLaunchCfg(src.cols, src.rows, block, grid);
-
-        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
-        const int theight = divUp(divUp(src.rows, grid.y), block.y);
-
-        unsigned int* count_buf = buf.ptr(0);
-
-        cudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) );
-
-        kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, count_buf, twidth, theight);
-        cudaSafeCall( cudaGetLastError() );
-
-        cudaSafeCall( cudaDeviceSynchronize() );
-
-        unsigned int count;
-        cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost));
-
-        return count;
-    }
-
-    template int run<uchar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
-    template int run<schar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
-    template int run<ushort>(const PtrStepSzb src, PtrStep<unsigned int> buf);
-    template int run<short >(const PtrStepSzb src, PtrStep<unsigned int> buf);
-    template int run<int   >(const PtrStepSzb src, PtrStep<unsigned int> buf);
-    template int run<float >(const PtrStepSzb src, PtrStep<unsigned int> buf);
-    template int run<double>(const PtrStepSzb src, PtrStep<unsigned int> buf);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// reduce
-
-namespace reduce
-{
-    struct Sum
-    {
-        template <typename T>
-        __device__ __forceinline__ T startValue() const
-        {
-            return VecTraits<T>::all(0);
-        }
-
-        template <typename T>
-        __device__ __forceinline__ T operator ()(T a, T b) const
-        {
-            return a + b;
-        }
-
-        template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
-        {
-            return r;
-        }
-
-        __device__ __forceinline__ Sum() {}
-        __device__ __forceinline__ Sum(const Sum&) {}
-    };
-
-    struct Avg
-    {
-        template <typename T>
-        __device__ __forceinline__ T startValue() const
-        {
-            return VecTraits<T>::all(0);
-        }
-
-        template <typename T>
-        __device__ __forceinline__ T operator ()(T a, T b) const
-        {
-            return a + b;
-        }
-
-        template <typename T>
-        __device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
-        {
-            return r / sz;
-        }
-
-        __device__ __forceinline__ Avg() {}
-        __device__ __forceinline__ Avg(const Avg&) {}
-    };
-
-    struct Min
-    {
-        template <typename T>
-        __device__ __forceinline__ T startValue() const
-        {
-            return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max());
-        }
-
-        template <typename T>
-        __device__ __forceinline__ T operator ()(T a, T b) const
-        {
-            minimum<T> minOp;
-            return minOp(a, b);
-        }
-
-        template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
-        {
-            return r;
-        }
-
-        __device__ __forceinline__ Min() {}
-        __device__ __forceinline__ Min(const Min&) {}
-    };
-
-    struct Max
-    {
-        template <typename T>
-        __device__ __forceinline__ T startValue() const
-        {
-            return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max());
-        }
-
-        template <typename T>
-        __device__ __forceinline__ T operator ()(T a, T b) const
-        {
-            maximum<T> maxOp;
-            return maxOp(a, b);
-        }
-
-        template <typename T>
-        __device__ __forceinline__ T result(T r, double) const
-        {
-            return r;
-        }
-
-        __device__ __forceinline__ Max() {}
-        __device__ __forceinline__ Max(const Max&) {}
-    };
-
-    ///////////////////////////////////////////////////////////
-
-    template <typename T, typename S, typename D, class Op>
-    __global__ void rowsKernel(const PtrStepSz<T> src, D* dst, const Op op)
-    {
-        __shared__ S smem[16 * 16];
-
-        const int x = blockIdx.x * 16 + threadIdx.x;
-
-        S myVal = op.template startValue<S>();
-
-        if (x < src.cols)
-        {
-            for (int y = threadIdx.y; y < src.rows; y += 16)
-            {
-                S srcVal = src(y, x);
-                myVal = op(myVal, srcVal);
-            }
-        }
-
-        smem[threadIdx.x * 16 + threadIdx.y] = myVal;
-
-        __syncthreads();
-
-        volatile S* srow = smem + threadIdx.y * 16;
-
-        myVal = srow[threadIdx.x];
-        cudev::reduce<16>(srow, myVal, threadIdx.x, op);
-
-        if (threadIdx.x == 0)
-            srow[0] = myVal;
-
-        __syncthreads();
-
-        if (threadIdx.y == 0 && x < src.cols)
-            dst[x] = (D) op.result(smem[threadIdx.x * 16], src.rows);
-    }
-
-    template <typename T, typename S, typename D, class Op>
-    void rowsCaller(PtrStepSz<T> src, D* dst, cudaStream_t stream)
-    {
-        const dim3 block(16, 16);
-        const dim3 grid(divUp(src.cols, block.x));
-
-        Op op;
-        rowsKernel<T, S, D, Op><<<grid, block, 0, stream>>>(src, dst, op);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-
-    template <typename T, typename S, typename D>
-    void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSz<T> src, D* dst, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            rowsCaller<T, S, D, Sum>,
-            rowsCaller<T, S, D, Avg>,
-            rowsCaller<T, S, D, Max>,
-            rowsCaller<T, S, D, Min>
-        };
-
-        funcs[op]((PtrStepSz<T>) src, (D*) dst, stream);
-    }
-
-    template void rows<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<unsigned char, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<unsigned char, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<unsigned char, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-
-    template void rows<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<unsigned short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<unsigned short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<unsigned short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-
-    template void rows<short, int, short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-
-    template void rows<int, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<int, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<int, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-
-    template void rows<float, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-    template void rows<float, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-
-    template void rows<double, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
-
-    ///////////////////////////////////////////////////////////
-
-    template <int BLOCK_SIZE, typename T, typename S, typename D, int cn, class Op>
-    __global__ void colsKernel(const PtrStepSz<typename TypeVec<T, cn>::vec_type> src, typename TypeVec<D, cn>::vec_type* dst, const Op op)
-    {
-        typedef typename TypeVec<T, cn>::vec_type src_type;
-        typedef typename TypeVec<S, cn>::vec_type work_type;
-        typedef typename TypeVec<D, cn>::vec_type dst_type;
-
-        __shared__ S smem[BLOCK_SIZE * cn];
-
-        const int y = blockIdx.x;
-
-        const src_type* srcRow = src.ptr(y);
-
-        work_type myVal = op.template startValue<work_type>();
-
-        for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
-            myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));
-
-        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
-
-        if (threadIdx.x == 0)
-            dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));
-    }
-
-    template <typename T, typename S, typename D, int cn, class Op> void colsCaller(PtrStepSzb src, void* dst, cudaStream_t stream)
-    {
-        const int BLOCK_SIZE = 256;
-
-        const dim3 block(BLOCK_SIZE);
-        const dim3 grid(src.rows);
-
-        Op op;
-        colsKernel<BLOCK_SIZE, T, S, D, cn, Op><<<grid, block, 0, stream>>>((PtrStepSz<typename TypeVec<T, cn>::vec_type>) src, (typename TypeVec<D, cn>::vec_type*) dst, op);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-
-    }
-
-    template <typename T, typename S, typename D> void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream)
-    {
-        typedef void (*func_t)(PtrStepSzb src, void* dst, cudaStream_t stream);
-        static const func_t funcs[5][4] =
-        {
-            {0,0,0,0},
-            {colsCaller<T, S, D, 1, Sum>, colsCaller<T, S, D, 1, Avg>, colsCaller<T, S, D, 1, Max>, colsCaller<T, S, D, 1, Min>},
-            {colsCaller<T, S, D, 2, Sum>, colsCaller<T, S, D, 2, Avg>, colsCaller<T, S, D, 2, Max>, colsCaller<T, S, D, 2, Min>},
-            {colsCaller<T, S, D, 3, Sum>, colsCaller<T, S, D, 3, Avg>, colsCaller<T, S, D, 3, Max>, colsCaller<T, S, D, 3, Min>},
-            {colsCaller<T, S, D, 4, Sum>, colsCaller<T, S, D, 4, Avg>, colsCaller<T, S, D, 4, Max>, colsCaller<T, S, D, 4, Min>},
-        };
-
-        funcs[cn][op](src, dst, stream);
-    }
-
-    template void cols<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<unsigned char, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<unsigned char, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<unsigned char, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-
-    template void cols<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<unsigned short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<unsigned short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<unsigned short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-
-    template void cols<short, int, short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-
-    template void cols<int, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<int, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<int, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-
-    template void cols<float, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-    template void cols<float, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-
-    template void cols<double, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
-}
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp
index 24a865518..10b72e623 100644
--- a/modules/gpu/src/cuda/safe_call.hpp
+++ b/modules/gpu/src/cuda/safe_call.hpp
@@ -45,24 +45,20 @@
 
 #include <cuda_runtime_api.h>
 #include <cufft.h>
-#include <cublas.h>
 #include "NCV.hpp"
 
 #if defined(__GNUC__)
     #define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
     #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
-    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
 #else /* defined(__CUDACC__) || defined(__MSVC__) */
     #define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__)
     #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__)
-    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__)
 #endif
 
 namespace cv { namespace gpu
 {
     void ncvError(int err, const char *file, const int line, const char *func = "");
     void cufftError(int err, const char *file, const int line, const char *func = "");
-    void cublasError(int err, const char *file, const int line, const char *func = "");
 }}
 
 static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
@@ -77,10 +73,4 @@ static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const i
         cv::gpu::cufftError(err, file, line, func);
 }
 
-static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
-{
-    if (CUBLAS_STATUS_SUCCESS != err)
-        cv::gpu::cublasError(err, file, line, func);
-}
-
 #endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
diff --git a/modules/gpu/src/error.cpp b/modules/gpu/src/error.cpp
index b7f4e3e0b..36759864b 100644
--- a/modules/gpu/src/error.cpp
+++ b/modules/gpu/src/error.cpp
@@ -142,23 +142,6 @@ namespace
     };
 
     const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
-
-    //////////////////////////////////////////////////////////////////////////
-    // CUBLAS errors
-
-    const ErrorEntry cublas_errors[] =
-    {
-        error_entry( CUBLAS_STATUS_SUCCESS ),
-        error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
-        error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
-        error_entry( CUBLAS_STATUS_INVALID_VALUE ),
-        error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
-        error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
-        error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
-        error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
-    };
-
-    const int cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
 }
 
 namespace cv
@@ -176,12 +159,6 @@ namespace cv
             String msg = getErrorString(code, cufft_errors, cufft_error_num);
             cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
         }
-
-        void cublasError(int code, const char* file, const int line, const char* func)
-        {
-            String msg = getErrorString(code, cublas_errors, cublas_error_num);
-            cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
-        }
     }
 }
 
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index 885445c1a..96a62b86b 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -92,6 +92,7 @@ void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, do
 void cv::gpu::CannyBuf::create(const Size&, int) { throw_no_cuda(); }
 void cv::gpu::CannyBuf::release() { throw_no_cuda(); }
 cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::gpu::CLAHE>(); }
+void cv::gpu::alphaComp(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -1672,4 +1673,77 @@ cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double clipLimit, cv::Size tileGrid
     return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
 }
 
+////////////////////////////////////////////////////////////////////////
+// alphaComp
+
+namespace
+{
+    template <int DEPTH> struct NppAlphaCompFunc
+    {
+        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
+
+        typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, NppiAlphaOp eAlphaOp);
+    };
+
+    template <int DEPTH, typename NppAlphaCompFunc<DEPTH>::func_t func> struct NppAlphaComp
+    {
+        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
+
+        static void call(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream)
+        {
+            NppStreamHandler h(stream);
+
+            NppiSize oSizeROI;
+            oSizeROI.width = img1.cols;
+            oSizeROI.height = img2.rows;
+
+            nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
+                              dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+}
+
+void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream)
+{
+    static const NppiAlphaOp npp_alpha_ops[] = {
+        NPPI_OP_ALPHA_OVER,
+        NPPI_OP_ALPHA_IN,
+        NPPI_OP_ALPHA_OUT,
+        NPPI_OP_ALPHA_ATOP,
+        NPPI_OP_ALPHA_XOR,
+        NPPI_OP_ALPHA_PLUS,
+        NPPI_OP_ALPHA_OVER_PREMUL,
+        NPPI_OP_ALPHA_IN_PREMUL,
+        NPPI_OP_ALPHA_OUT_PREMUL,
+        NPPI_OP_ALPHA_ATOP_PREMUL,
+        NPPI_OP_ALPHA_XOR_PREMUL,
+        NPPI_OP_ALPHA_PLUS_PREMUL,
+        NPPI_OP_ALPHA_PREMUL
+    };
+
+    typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
+
+    static const func_t funcs[] =
+    {
+        NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
+        0,
+        NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call,
+        0,
+        NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call,
+        NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
+    };
+
+    CV_Assert( img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4 );
+    CV_Assert( img1.size() == img2.size() && img1.type() == img2.type() );
+
+    dst.create(img1.size(), img1.type());
+
+    const func_t func = funcs[img1.depth()];
+
+    func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream));
+}
+
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index aacc43f99..69ddeaed9 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -76,10 +76,6 @@
         #include <cufft.h>
     #endif
 
-    #ifdef HAVE_CUBLAS
-        #include <cublas.h>
-    #endif
-
     #include "internal_shared.hpp"
     #include "opencv2/core/stream_accessor.hpp"
 
diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt
new file mode 100644
index 000000000..99a6fcce1
--- /dev/null
+++ b/modules/gpuarithm/CMakeLists.txt
@@ -0,0 +1,13 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpuarithm)
+endif()
+
+set(the_description "GPU-accelerated Operations on Matrices")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpuarithm opencv_core)
+
+if(HAVE_CUBLAS)
+  CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
+endif()
diff --git a/modules/gpuarithm/doc/gpuarithm.rst b/modules/gpuarithm/doc/gpuarithm.rst
new file mode 100644
index 000000000..a8b0f1445
--- /dev/null
+++ b/modules/gpuarithm/doc/gpuarithm.rst
@@ -0,0 +1,10 @@
+*******************************************
+gpu. GPU-accelerated Operations on Matrices
+*******************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    operations_on_matrices
+    per_element_operations
+    matrix_reductions
diff --git a/modules/gpu/doc/matrix_reductions.rst b/modules/gpuarithm/doc/matrix_reductions.rst
similarity index 100%
rename from modules/gpu/doc/matrix_reductions.rst
rename to modules/gpuarithm/doc/matrix_reductions.rst
diff --git a/modules/gpu/doc/operations_on_matrices.rst b/modules/gpuarithm/doc/operations_on_matrices.rst
similarity index 100%
rename from modules/gpu/doc/operations_on_matrices.rst
rename to modules/gpuarithm/doc/operations_on_matrices.rst
diff --git a/modules/gpu/doc/per_element_operations.rst b/modules/gpuarithm/doc/per_element_operations.rst
similarity index 95%
rename from modules/gpu/doc/per_element_operations.rst
rename to modules/gpuarithm/doc/per_element_operations.rst
index 2670ba323..bf393a24f 100644
--- a/modules/gpu/doc/per_element_operations.rst
+++ b/modules/gpuarithm/doc/per_element_operations.rst
@@ -443,3 +443,25 @@ Computes the per-element maximum of two matrices (or a matrix and a scalar).
     :param stream: Stream for the asynchronous version.
 
 .. seealso:: :ocv:func:`max`
+
+
+
+gpu::threshold
+------------------
+Applies a fixed-level threshold to each array element.
+
+.. ocv:function:: double gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null())
+
+    :param src: Source array (single-channel).
+
+    :param dst: Destination array with the same size and type as  ``src`` .
+
+    :param thresh: Threshold value.
+
+    :param maxval: Maximum value to use with  ``THRESH_BINARY`` and  ``THRESH_BINARY_INV`` threshold types.
+
+    :param type: Threshold type. For details, see  :ocv:func:`threshold` . The ``THRESH_OTSU`` threshold type is not supported.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`threshold`
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
new file mode 100644
index 000000000..57d9abfbf
--- /dev/null
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -0,0 +1,279 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUARITHM_HPP__
+#define __OPENCV_GPUARITHM_HPP__
+
+#include "opencv2/core/gpumat.hpp"
+
+namespace cv { namespace gpu {
+
+//! adds one matrix to another (c = a + b)
+CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+//! adds scalar to a matrix (c = a + s)
+CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+
+//! subtracts one matrix from another (c = a - b)
+CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+//! subtracts scalar from a matrix (c = a - s)
+CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+
+//! computes element-wise weighted product of the two arrays (c = scale * a * b)
+CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+//! weighted multiplies matrix to a scalar (c = scale * a * s)
+CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+
+//! computes element-wise weighted quotient of the two arrays (c = a / b)
+CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+//! computes element-wise weighted quotient of matrix and scalar (c = a / s)
+CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+//! computes element-wise weighted reciprocal of an array (dst = scale/src2)
+CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null());
+
+//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma)
+CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst,
+                            int dtype = -1, Stream& stream = Stream::Null());
+
+//! adds scaled array to another one (dst = alpha*src1 + src2)
+static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null())
+{
+    addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream);
+}
+
+//! computes element-wise absolute difference of two arrays (c = abs(a - b))
+CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
+//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
+CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null());
+
+//! computes absolute value of each matrix element
+//! supports CV_16S and CV_32F depth
+CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! computes square of each pixel in an image
+//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! computes square root of each pixel in an image
+//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! computes exponent of each matrix element (b = e**a)
+//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
+
+//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
+//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
+
+//! computes power of each matrix element:
+//    (dst(i,j) = pow(     src(i,j) , power), if src.type() is integer
+//    (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
+//! supports all, except depth == CV_64F
+CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! compares elements of two arrays (c = a <cmpop> b)
+CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
+CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
+
+//! performs per-elements bit-wise inversion
+CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+
+//! calculates per-element bit-wise disjunction of two arrays
+CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+//! calculates per-element bit-wise disjunction of array and scalar
+//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! calculates per-element bit-wise conjunction of two arrays
+CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+//! calculates per-element bit-wise conjunction of array and scalar
+//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! calculates per-element bit-wise "exclusive or" operation
+CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+//! calculates per-element bit-wise "exclusive or" of array and scalar
+//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! pixel by pixel right shift of an image by a constant value
+//! supports 1, 3 and 4 channels images with integers elements
+CV_EXPORTS void rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! pixel by pixel left shift of an image by a constant value
+//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+CV_EXPORTS void lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! computes per-element minimum of two arrays (dst = min(src1, src2))
+CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! computes per-element minimum of array and scalar (dst = min(src1, src2))
+CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! computes per-element maximum of two arrays (dst = max(src1, src2))
+CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! computes per-element maximum of array and scalar (dst = max(src1, src2))
+CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! implements generalized matrix product algorithm GEMM from BLAS
+CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
+    const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());
+
+//! transposes the matrix
+//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc)
+CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! reverses the order of the rows, columns or both in a matrix
+//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth
+CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null());
+
+//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+//! destination array will have the depth type as lut and the same channels number as source
+//! supports CV_8UC1, CV_8UC3 types
+CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! makes multi-channel array out of several single-channel arrays
+CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! makes multi-channel array out of several single-channel arrays
+CV_EXPORTS void merge(const std::vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! copies each plane of a multi-channel array to a dedicated array
+CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null());
+
+//! copies each plane of a multi-channel array to a dedicated array
+CV_EXPORTS void split(const GpuMat& src, std::vector<GpuMat>& dst, Stream& stream = Stream::Null());
+
+//! computes magnitude of complex (x(i).re, x(i).im) vector
+//! supports only CV_32FC2 type
+CV_EXPORTS void magnitude(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null());
+
+//! computes squared magnitude of complex (x(i).re, x(i).im) vector
+//! supports only CV_32FC2 type
+CV_EXPORTS void magnitudeSqr(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null());
+
+//! computes magnitude of each (x(i), y(i)) vector
+//! supports only floating-point source
+CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
+
+//! computes squared magnitude of each (x(i), y(i)) vector
+//! supports only floating-point source
+CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
+
+//! computes angle (angle(i)) of each (x(i), y(i)) vector
+//! supports only floating-point source
+CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
+
+//! converts Cartesian coordinates to polar
+//! supports only floating-point source
+CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
+
+//! converts polar coordinates to Cartesian
+//! supports only floating-point source
+CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null());
+
+//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
+CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0,
+                          int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat());
+CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b,
+                          int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf);
+
+//! computes mean value and standard deviation of all or selected array elements
+//! supports only CV_8UC1 type
+CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
+//! buffered version
+CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
+
+//! computes norm of array
+//! supports NORM_INF, NORM_L1, NORM_L2
+//! supports all matrices except 64F
+CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2);
+CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf);
+CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf);
+
+//! computes norm of the difference between two arrays
+//! supports NORM_INF, NORM_L1, NORM_L2
+//! supports only CV_8UC1 type
+CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2);
+
+//! computes sum of array elements
+//! supports only single channel images
+CV_EXPORTS Scalar sum(const GpuMat& src);
+CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf);
+CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+
+//! computes sum of array elements absolute values
+//! supports only single channel images
+CV_EXPORTS Scalar absSum(const GpuMat& src);
+CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf);
+CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+
+//! computes squared sum of array elements
+//! supports only single channel images
+CV_EXPORTS Scalar sqrSum(const GpuMat& src);
+CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf);
+CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+
+//! finds global minimum and maximum array elements and returns their values
+CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat());
+CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf);
+
+//! finds global minimum and maximum array elements and returns their values with locations
+CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
+                          const GpuMat& mask=GpuMat());
+CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
+                          const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf);
+
+//! counts non-zero array elements
+CV_EXPORTS int countNonZero(const GpuMat& src);
+CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf);
+
+//! reduces a matrix to a vector
+CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
+
+//! applies fixed threshold to the image
+CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUARITHM_HPP__ */
diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
similarity index 100%
rename from modules/gpu/perf/perf_core.cpp
rename to modules/gpuarithm/perf/perf_core.cpp
diff --git a/modules/gpuarithm/perf/perf_main.cpp b/modules/gpuarithm/perf/perf_main.cpp
new file mode 100644
index 000000000..b35791cda
--- /dev/null
+++ b/modules/gpuarithm/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
diff --git a/modules/gpuarithm/perf/perf_precomp.cpp b/modules/gpuarithm/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpuarithm/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpuarithm/perf/perf_precomp.hpp b/modules/gpuarithm/perf/perf_precomp.hpp
new file mode 100644
index 000000000..06bc20b9b
--- /dev/null
+++ b/modules/gpuarithm/perf/perf_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/core.hpp"
+#include "opencv2/gpuarithm.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
similarity index 90%
rename from modules/gpu/src/arithm.cpp
rename to modules/gpuarithm/src/arithm.cpp
index c40e7131e..d452e3ae7 100644
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -66,6 +66,61 @@ void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const
 ////////////////////////////////////////////////////////////////////////
 // gemm
 
+#ifdef HAVE_CUBLAS
+
+namespace
+{
+    #define error_entry(entry)  { entry, #entry }
+
+    struct ErrorEntry
+    {
+        int code;
+        const char* str;
+    };
+
+    struct ErrorEntryComparer
+    {
+        int code;
+        ErrorEntryComparer(int code_) : code(code_) {}
+        bool operator()(const ErrorEntry& e) const { return e.code == code; }
+    };
+
+    const ErrorEntry cublas_errors[] =
+    {
+        error_entry( CUBLAS_STATUS_SUCCESS ),
+        error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
+        error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
+        error_entry( CUBLAS_STATUS_INVALID_VALUE ),
+        error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
+        error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
+        error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
+        error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
+    };
+
+    const size_t cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
+
+    static inline void ___cublasSafeCall(cublasStatus_t err, const char* file, const int line, const char* func)
+    {
+        if (CUBLAS_STATUS_SUCCESS != err)
+        {
+            size_t idx = std::find_if(cublas_errors, cublas_errors + cublas_error_num, ErrorEntryComparer(err)) - cublas_errors;
+
+            const char* msg = (idx != cublas_error_num) ? cublas_errors[idx].str : "Unknown error code";
+            String str = cv::format("%s [Code = %d]", msg, err);
+
+            cv::error(cv::Error::GpuApiCallError, str, func, file, line);
+        }
+    }
+}
+
+#if defined(__GNUC__)
+    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, "")
+#endif
+
+#endif
+
 void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags, Stream& stream)
 {
 #ifndef HAVE_CUBLAS
@@ -200,9 +255,14 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
 ////////////////////////////////////////////////////////////////////////
 // transpose
 
+namespace arithm
+{
+    template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream);
+}
+
 void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
 {
-    CV_Assert(src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8);
+    CV_Assert( src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8 );
 
     dst.create( src.cols, src.rows, src.type() );
 
@@ -218,35 +278,21 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
 
         nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
             dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
     }
     else if (src.elemSize() == 4)
     {
-        NppStStreamHandler h(stream);
-
-        NcvSize32u sz;
-        sz.width  = src.cols;
-        sz.height = src.rows;
-
-        ncvSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step),
-            dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) );
+        arithm::transpose<int>(src, dst, stream);
     }
     else // if (src.elemSize() == 8)
     {
         if (!deviceSupports(NATIVE_DOUBLE))
             CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
 
-        NppStStreamHandler h(stream);
-
-        NcvSize32u sz;
-        sz.width  = src.cols;
-        sz.height = src.rows;
-
-        ncvSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step),
-            dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) );
+        arithm::transpose<double>(src, dst, stream);
     }
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
 }
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/modules/gpuarithm/src/cuda/absdiff_mat.cu b/modules/gpuarithm/src/cuda/absdiff_mat.cu
new file mode 100644
index 000000000..d47cbdc17
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/absdiff_mat.cu
@@ -0,0 +1,147 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    struct VAbsDiff4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vabsdiff4(a, b);
+        }
+
+        __device__ __forceinline__ VAbsDiff4() {}
+        __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
+    };
+
+    struct VAbsDiff2 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vabsdiff2(a, b);
+        }
+
+        __device__ __forceinline__ VAbsDiff2() {}
+        __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
+    };
+
+    __device__ __forceinline__ int _abs(int a)
+    {
+        return ::abs(a);
+    }
+    __device__ __forceinline__ float _abs(float a)
+    {
+        return ::fabsf(a);
+    }
+    __device__ __forceinline__ double _abs(double a)
+    {
+        return ::fabs(a);
+    }
+
+    template <typename T> struct AbsDiffMat : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(T a, T b) const
+        {
+            return saturate_cast<T>(_abs(a - b));
+        }
+
+        __device__ __forceinline__ AbsDiffMat() {}
+        __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< arithm::AbsDiffMat<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
+    }
+
+    void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
+    }
+
+    template <typename T>
+    void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
+    }
+
+    template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffMat<schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffMat<short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffMat<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffMat<float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/absdiff_scalar.cu b/modules/gpuarithm/src/cuda/absdiff_scalar.cu
new file mode 100644
index 000000000..e705609b7
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/absdiff_scalar.cu
@@ -0,0 +1,98 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <typename T, typename S> struct AbsDiffScalar : unary_function<T, T>
+    {
+        S val;
+
+        explicit AbsDiffScalar(S val_) : val(val_) {}
+
+        __device__ __forceinline__ T operator ()(T a) const
+        {
+            abs_func<S> f;
+            return saturate_cast<T>(f(a - val));
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T, typename S>
+    void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
+    {
+        AbsDiffScalar<T, S> op(static_cast<S>(val));
+
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
+    }
+
+    template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffScalar<schar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffScalar<ushort, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffScalar<short, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffScalar<int, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffScalar<float, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void absDiffScalar<double, double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/add_mat.cu b/modules/gpuarithm/src/cuda/add_mat.cu
new file mode 100644
index 000000000..511e11d0f
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/add_mat.cu
@@ -0,0 +1,185 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    struct VAdd4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vadd4(a, b);
+        }
+
+        __device__ __forceinline__ VAdd4() {}
+        __device__ __forceinline__ VAdd4(const VAdd4& other) {}
+    };
+
+    struct VAdd2 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vadd2(a, b);
+        }
+
+        __device__ __forceinline__ VAdd2() {}
+        __device__ __forceinline__ VAdd2(const VAdd2& other) {}
+    };
+
+    template <typename T, typename D> struct AddMat : binary_function<T, T, D>
+    {
+        __device__ __forceinline__ D operator ()(T a, T b) const
+        {
+            return saturate_cast<D>(a + b);
+        }
+
+        __device__ __forceinline__ AddMat() {}
+        __device__ __forceinline__ AddMat(const AddMat& other) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <typename T, typename D> struct TransformFunctorTraits< arithm::AddMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
+    }
+
+    void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
+    }
+
+    template <typename T, typename D>
+    void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
+    }
+
+    template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    template void addMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/add_scalar.cu b/modules/gpuarithm/src/cuda/add_scalar.cu
new file mode 100644
index 000000000..3f43f8d7a
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/add_scalar.cu
@@ -0,0 +1,148 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <typename T, typename S, typename D> struct AddScalar : unary_function<T, D>
+    {
+        S val;
+
+        explicit AddScalar(S val_) : val(val_) {}
+
+        __device__ __forceinline__ D operator ()(T a) const
+        {
+            return saturate_cast<D>(a + val);
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T, typename S, typename D>
+    void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        AddScalar<T, S, D> op(static_cast<S>(val));
+
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+    }
+
+    template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    template void addScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void addScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void addScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void addScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/add_weighted.cu b/modules/gpuarithm/src/cuda/add_weighted.cu
new file mode 100644
index 000000000..88d8de951
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/add_weighted.cu
@@ -0,0 +1,364 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <typename T> struct UseDouble_
+    {
+        enum {value = 0};
+    };
+    template <> struct UseDouble_<double>
+    {
+        enum {value = 1};
+    };
+    template <typename T1, typename T2, typename D> struct UseDouble
+    {
+        enum {value = (UseDouble_<T1>::value || UseDouble_<T2>::value || UseDouble_<D>::value)};
+    };
+
+    template <typename T1, typename T2, typename D, bool useDouble> struct AddWeighted_;
+    template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, false> : binary_function<T1, T2, D>
+    {
+        float alpha;
+        float beta;
+        float gamma;
+
+        AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
+
+        __device__ __forceinline__ D operator ()(T1 a, T2 b) const
+        {
+            return saturate_cast<D>(a * alpha + b * beta + gamma);
+        }
+    };
+    template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, true> : binary_function<T1, T2, D>
+    {
+        double alpha;
+        double beta;
+        double gamma;
+
+        AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
+
+        __device__ __forceinline__ D operator ()(T1 a, T2 b) const
+        {
+            return saturate_cast<D>(a * alpha + b * beta + gamma);
+        }
+    };
+    template <typename T1, typename T2, typename D> struct AddWeighted : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>
+    {
+        AddWeighted(double alpha_, double beta_, double gamma_) : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>(alpha_, beta_, gamma_) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
+    {
+    };
+    template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : arithm::ArithmFuncTraits<src_size, dst_size>
+    {
+    };
+
+    template <typename T1, typename T2, typename D> struct TransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T1, typename T2, typename D>
+    void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream)
+    {
+        AddWeighted<T1, T2, D> op(alpha, beta, gamma);
+
+        cudev::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+    }
+
+    template void addWeighted<uchar, uchar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, uchar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, uchar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, uchar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, uchar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, uchar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, uchar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<uchar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<uchar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<uchar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<uchar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<uchar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<uchar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<uchar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+
+
+    template void addWeighted<schar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<schar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<schar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<schar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<schar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<schar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<schar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+
+
+    template void addWeighted<ushort, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<ushort, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<ushort, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<ushort, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<ushort, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<ushort, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+
+
+    template void addWeighted<short, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<short, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<short, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<short, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<short, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+
+
+    template void addWeighted<int, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<int, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<int, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<int, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+
+
+    template void addWeighted<float, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+    template void addWeighted<float, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<float, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+
+
+
+    template void addWeighted<double, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<double, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<double, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<double, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<double, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<double, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    template void addWeighted<double, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuarithm/src/cuda/arithm_func_traits.hpp b/modules/gpuarithm/src/cuda/arithm_func_traits.hpp
new file mode 100644
index 000000000..051fd9ff0
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/arithm_func_traits.hpp
@@ -0,0 +1,145 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __ARITHM_FUNC_TRAITS_HPP__
+#define __ARITHM_FUNC_TRAITS_HPP__
+
+#include <cstddef>
+
+namespace arithm
+{
+    template <size_t src_size, size_t dst_size> struct ArithmFuncTraits
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 1 };
+    };
+
+    template <> struct ArithmFuncTraits<1, 1>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+    template <> struct ArithmFuncTraits<1, 2>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+    template <> struct ArithmFuncTraits<1, 4>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+
+    template <> struct ArithmFuncTraits<2, 1>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+    template <> struct ArithmFuncTraits<2, 2>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+    template <> struct ArithmFuncTraits<2, 4>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+
+    template <> struct ArithmFuncTraits<4, 1>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+    template <> struct ArithmFuncTraits<4, 2>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+    template <> struct ArithmFuncTraits<4, 4>
+    {
+        enum { simple_block_dim_x = 32 };
+        enum { simple_block_dim_y = 8 };
+
+        enum { smart_block_dim_x = 32 };
+        enum { smart_block_dim_y = 8 };
+        enum { smart_shift = 4 };
+    };
+}
+
+#endif // __ARITHM_FUNC_TRAITS_HPP__
diff --git a/modules/gpuarithm/src/cuda/bitwise_mat.cu b/modules/gpuarithm/src/cuda/bitwise_mat.cu
new file mode 100644
index 000000000..40222214e
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/bitwise_mat.cu
@@ -0,0 +1,126 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
+    }
+
+    template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
+    }
+
+    template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
+    }
+
+    template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
+    }
+
+    template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/bitwise_scalar.cu b/modules/gpuarithm/src/cuda/bitwise_scalar.cu
new file mode 100644
index 000000000..145885024
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/bitwise_scalar.cu
@@ -0,0 +1,104 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
+    }
+
+    template <typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
+    }
+
+    template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
+    }
+
+    template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+
+    template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+
+    template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/cmp_mat.cu b/modules/gpuarithm/src/cuda/cmp_mat.cu
new file mode 100644
index 000000000..9cf9787a9
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/cmp_mat.cu
@@ -0,0 +1,206 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    struct VCmpEq4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vcmpeq4(a, b);
+        }
+
+        __device__ __forceinline__ VCmpEq4() {}
+        __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
+    };
+    struct VCmpNe4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vcmpne4(a, b);
+        }
+
+        __device__ __forceinline__ VCmpNe4() {}
+        __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
+    };
+    struct VCmpLt4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vcmplt4(a, b);
+        }
+
+        __device__ __forceinline__ VCmpLt4() {}
+        __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
+    };
+    struct VCmpLe4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vcmple4(a, b);
+        }
+
+        __device__ __forceinline__ VCmpLe4() {}
+        __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
+    };
+
+    template <class Op, typename T>
+    struct Cmp : binary_function<T, T, uchar>
+    {
+        __device__ __forceinline__ uchar operator()(T a, T b) const
+        {
+            Op op;
+            return -op(a, b);
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+    template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+    template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+    template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
+    }
+    void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
+    }
+    void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
+    }
+    void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
+    }
+
+    template <template <typename> class Op, typename T>
+    void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        Cmp<Op<T>, T> op;
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
+    }
+
+    template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cmpMat<equal_to, T>(src1, src2, dst, stream);
+    }
+    template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cmpMat<not_equal_to, T>(src1, src2, dst, stream);
+    }
+    template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cmpMat<less, T>(src1, src2, dst, stream);
+    }
+    template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cmpMat<less_equal, T>(src1, src2, dst, stream);
+    }
+
+    template void cmpMatEq<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatEq<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatEq<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatEq<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatEq<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatEq<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatEq<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpMatNe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatNe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatNe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatNe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatNe<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatNe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatNe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpMatLt<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLt<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLt<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLt<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLt<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLt<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLt<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpMatLe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLe<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void cmpMatLe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/cmp_scalar.cu b/modules/gpuarithm/src/cuda/cmp_scalar.cu
new file mode 100644
index 000000000..09ff0e6e7
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/cmp_scalar.cu
@@ -0,0 +1,284 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <class Op, typename T>
+    struct Cmp : binary_function<T, T, uchar>
+    {
+        __device__ __forceinline__ uchar operator()(T a, T b) const
+        {
+            Op op;
+            return -op(a, b);
+        }
+    };
+
+#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type
+
+    template <class Op, typename T, int cn> struct CmpScalar;
+    template <class Op, typename T>
+    struct CmpScalar<Op, T, 1> : unary_function<T, uchar>
+    {
+        const T val;
+
+        __host__ explicit CmpScalar(T val_) : val(val_) {}
+
+        __device__ __forceinline__ uchar operator()(T src) const
+        {
+            Cmp<Op, T> op;
+            return op(src, val);
+        }
+    };
+    template <class Op, typename T>
+    struct CmpScalar<Op, T, 2> : unary_function<TYPE_VEC(T, 2), TYPE_VEC(uchar, 2)>
+    {
+        const TYPE_VEC(T, 2) val;
+
+        __host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {}
+
+        __device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const
+        {
+            Cmp<Op, T> op;
+            return VecTraits<TYPE_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y));
+        }
+    };
+    template <class Op, typename T>
+    struct CmpScalar<Op, T, 3> : unary_function<TYPE_VEC(T, 3), TYPE_VEC(uchar, 3)>
+    {
+        const TYPE_VEC(T, 3) val;
+
+        __host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {}
+
+        __device__ __forceinline__ TYPE_VEC(uchar, 3) operator()(const TYPE_VEC(T, 3) & src) const
+        {
+            Cmp<Op, T> op;
+            return VecTraits<TYPE_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z));
+        }
+    };
+    template <class Op, typename T>
+    struct CmpScalar<Op, T, 4> : unary_function<TYPE_VEC(T, 4), TYPE_VEC(uchar, 4)>
+    {
+        const TYPE_VEC(T, 4) val;
+
+        __host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {}
+
+        __device__ __forceinline__ TYPE_VEC(uchar, 4) operator()(const TYPE_VEC(T, 4) & src) const
+        {
+            Cmp<Op, T> op;
+            return VecTraits<TYPE_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w));
+        }
+    };
+
+#undef TYPE_VEC
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <template <typename> class Op, typename T, int cn>
+    void cmpScalar(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef typename TypeVec<T, cn>::vec_type src_t;
+        typedef typename TypeVec<uchar, cn>::vec_type dst_t;
+
+        T sval[] = {static_cast<T>(val[0]), static_cast<T>(val[1]), static_cast<T>(val[2]), static_cast<T>(val[3])};
+        src_t val1 = VecTraits<src_t>::make(sval);
+
+        CmpScalar<Op<T>, T, cn> op(val1);
+        cudev::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
+    }
+
+    template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            0,
+            cmpScalar<equal_to, T, 1>,
+            cmpScalar<equal_to, T, 2>,
+            cmpScalar<equal_to, T, 3>,
+            cmpScalar<equal_to, T, 4>
+        };
+
+        funcs[cn](src, val, dst, stream);
+    }
+    template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            0,
+            cmpScalar<not_equal_to, T, 1>,
+            cmpScalar<not_equal_to, T, 2>,
+            cmpScalar<not_equal_to, T, 3>,
+            cmpScalar<not_equal_to, T, 4>
+        };
+
+        funcs[cn](src, val, dst, stream);
+    }
+    template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            0,
+            cmpScalar<less, T, 1>,
+            cmpScalar<less, T, 2>,
+            cmpScalar<less, T, 3>,
+            cmpScalar<less, T, 4>
+        };
+
+        funcs[cn](src, val, dst, stream);
+    }
+    template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            0,
+            cmpScalar<less_equal, T, 1>,
+            cmpScalar<less_equal, T, 2>,
+            cmpScalar<less_equal, T, 3>,
+            cmpScalar<less_equal, T, 4>
+        };
+
+        funcs[cn](src, val, dst, stream);
+    }
+    template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            0,
+            cmpScalar<greater, T, 1>,
+            cmpScalar<greater, T, 2>,
+            cmpScalar<greater, T, 3>,
+            cmpScalar<greater, T, 4>
+        };
+
+        funcs[cn](src, val, dst, stream);
+    }
+    template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            0,
+            cmpScalar<greater_equal, T, 1>,
+            cmpScalar<greater_equal, T, 2>,
+            cmpScalar<greater_equal, T, 3>,
+            cmpScalar<greater_equal, T, 4>
+        };
+
+        funcs[cn](src, val, dst, stream);
+    }
+
+    template void cmpScalarEq<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarEq<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarEq<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarEq<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarEq<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarEq<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarEq<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpScalarNe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarNe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarNe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarNe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarNe<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarNe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarNe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpScalarLt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLt<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpScalarLe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLe<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarLe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpScalarGt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGt<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+
+    template void cmpScalarGe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGe<int   >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+    template void cmpScalarGe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/countnonzero.cu b/modules/gpuarithm/src/cuda/countnonzero.cu
new file mode 100644
index 000000000..837392149
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/countnonzero.cu
@@ -0,0 +1,175 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/reduce.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace countNonZero
+{
+    __device__ unsigned int blocks_finished = 0;
+
+    template <int BLOCK_SIZE, typename T>
+    __global__ void kernel(const PtrStepSz<T> src, unsigned int* count, const int twidth, const int theight)
+    {
+        __shared__ unsigned int scount[BLOCK_SIZE];
+
+        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
+        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
+
+        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
+
+        unsigned int mycount = 0;
+
+        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
+        {
+            const T* ptr = src.ptr(y);
+
+            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
+            {
+                const T srcVal = ptr[x];
+
+                mycount += (srcVal != 0);
+            }
+        }
+
+        cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
+
+    #if __CUDA_ARCH__ >= 200
+        if (tid == 0)
+            ::atomicAdd(count, mycount);
+    #else
+        __shared__ bool is_last;
+        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
+
+        if (tid == 0)
+        {
+            count[bid] = mycount;
+
+            __threadfence();
+
+            unsigned int ticket = ::atomicInc(&blocks_finished, gridDim.x * gridDim.y);
+            is_last = (ticket == gridDim.x * gridDim.y - 1);
+        }
+
+        __syncthreads();
+
+        if (is_last)
+        {
+            mycount = tid < gridDim.x * gridDim.y ? count[tid] : 0;
+
+            cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
+
+            if (tid == 0)
+            {
+                count[0] = mycount;
+
+                blocks_finished = 0;
+            }
+        }
+    #endif
+    }
+
+    const int threads_x = 32;
+    const int threads_y = 8;
+
+    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
+    {
+        block = dim3(threads_x, threads_y);
+
+        grid = dim3(divUp(cols, block.x * block.y),
+                    divUp(rows, block.y * block.x));
+
+        grid.x = ::min(grid.x, block.x);
+        grid.y = ::min(grid.y, block.y);
+    }
+
+    void getBufSize(int cols, int rows, int& bufcols, int& bufrows)
+    {
+        dim3 block, grid;
+        getLaunchCfg(cols, rows, block, grid);
+
+        bufcols = grid.x * grid.y * sizeof(int);
+        bufrows = 1;
+    }
+
+    template <typename T>
+    int run(const PtrStepSzb src, PtrStep<unsigned int> buf)
+    {
+        dim3 block, grid;
+        getLaunchCfg(src.cols, src.rows, block, grid);
+
+        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
+        const int theight = divUp(divUp(src.rows, grid.y), block.y);
+
+        unsigned int* count_buf = buf.ptr(0);
+
+        cudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) );
+
+        kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, count_buf, twidth, theight);
+        cudaSafeCall( cudaGetLastError() );
+
+        cudaSafeCall( cudaDeviceSynchronize() );
+
+        unsigned int count;
+        cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost));
+
+        return count;
+    }
+
+    template int run<uchar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
+    template int run<schar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
+    template int run<ushort>(const PtrStepSzb src, PtrStep<unsigned int> buf);
+    template int run<short >(const PtrStepSzb src, PtrStep<unsigned int> buf);
+    template int run<int   >(const PtrStepSzb src, PtrStep<unsigned int> buf);
+    template int run<float >(const PtrStepSzb src, PtrStep<unsigned int> buf);
+    template int run<double>(const PtrStepSzb src, PtrStep<unsigned int> buf);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/div_inv.cu b/modules/gpuarithm/src/cuda/div_inv.cu
new file mode 100644
index 000000000..bda3df30b
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/div_inv.cu
@@ -0,0 +1,144 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <typename T, typename S, typename D> struct DivInv : unary_function<T, D>
+    {
+        S val;
+
+        explicit DivInv(S val_) : val(val_) {}
+
+        __device__ __forceinline__ D operator ()(T a) const
+        {
+            return a != 0 ? saturate_cast<D>(val / a) : 0;
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T, typename S, typename D>
+    void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
+    {
+        DivInv<T, S, D> op(static_cast<S>(val));
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+    }
+
+    template void divInv<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    template void divInv<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divInv<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divInv<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divInv<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divInv<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divInv<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divInv<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divInv<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/div_mat.cu b/modules/gpuarithm/src/cuda/div_mat.cu
new file mode 100644
index 000000000..9d50dc7b6
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/div_mat.cu
@@ -0,0 +1,230 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    struct Div_8uc4_32f : binary_function<uint, float, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, float b) const
+        {
+            uint res = 0;
+
+            if (b != 0)
+            {
+                b = 1.0f / b;
+                res |= (saturate_cast<uchar>((0xffu & (a      )) * b)      );
+                res |= (saturate_cast<uchar>((0xffu & (a >>  8)) * b) <<  8);
+                res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
+                res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
+            }
+
+            return res;
+        }
+    };
+
+    struct Div_16sc4_32f : binary_function<short4, float, short4>
+    {
+        __device__ __forceinline__ short4 operator ()(short4 a, float b) const
+        {
+            return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
+                                        saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
+                          : make_short4(0,0,0,0);
+        }
+    };
+
+    template <typename T, typename D> struct Div : binary_function<T, T, D>
+    {
+        __device__ __forceinline__ D operator ()(T a, T b) const
+        {
+            return b != 0 ? saturate_cast<D>(a / b) : 0;
+        }
+
+        __device__ __forceinline__ Div() {}
+        __device__ __forceinline__ Div(const Div& other) {}
+    };
+    template <typename T> struct Div<T, float> : binary_function<T, T, float>
+    {
+        __device__ __forceinline__ float operator ()(T a, T b) const
+        {
+            return b != 0 ? static_cast<float>(a) / b : 0;
+        }
+
+        __device__ __forceinline__ Div() {}
+        __device__ __forceinline__ Div(const Div& other) {}
+    };
+    template <typename T> struct Div<T, double> : binary_function<T, T, double>
+    {
+        __device__ __forceinline__ double operator ()(T a, T b) const
+        {
+            return b != 0 ? static_cast<double>(a) / b : 0;
+        }
+
+        __device__ __forceinline__ Div() {}
+        __device__ __forceinline__ Div(const Div& other) {}
+    };
+
+    template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
+    {
+        S scale;
+
+        explicit DivScale(S scale_) : scale(scale_) {}
+
+        __device__ __forceinline__ D operator ()(T a, T b) const
+        {
+            return b != 0 ? saturate_cast<D>(scale * a / b) : 0;
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+
+    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
+    }
+
+    void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
+    }
+
+    template <typename T, typename S, typename D>
+    void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
+    {
+        if (scale == 1)
+        {
+            Div<T, D> op;
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        }
+        else
+        {
+            DivScale<T, S, D> op(static_cast<S>(scale));
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        }
+    }
+
+    template void divMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    template void divMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void divMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void divMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void divMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void divMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void divMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void divMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void divMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/div_scalar.cu b/modules/gpuarithm/src/cuda/div_scalar.cu
new file mode 100644
index 000000000..b176cfa2c
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/div_scalar.cu
@@ -0,0 +1,144 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <typename T, typename S, typename D> struct DivScalar : unary_function<T, D>
+    {
+        S val;
+
+        explicit DivScalar(S val_) : val(val_) {}
+
+        __device__ __forceinline__ D operator ()(T a) const
+        {
+            return saturate_cast<D>(a / val);
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T, typename S, typename D>
+    void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
+    {
+        DivScalar<T, S, D> op(static_cast<S>(val));
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+    }
+
+    template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    template void divScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void divScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void divScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void divScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/math.cu b/modules/gpuarithm/src/cuda/math.cu
new file mode 100644
index 000000000..30093d51c
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/math.cu
@@ -0,0 +1,302 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+#include "opencv2/core/cuda/limits.hpp"
+#include "opencv2/core/cuda/type_traits.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+//////////////////////////////////////////////////////////////////////////
+// absMat
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T>
+    void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
+    }
+
+    template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void absMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void absMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void absMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void absMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void absMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void absMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// sqrMat
+
+namespace arithm
+{
+    template <typename T> struct Sqr : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(T x) const
+        {
+            return saturate_cast<T>(x * x);
+        }
+
+        __device__ __forceinline__ Sqr() {}
+        __device__ __forceinline__ Sqr(const Sqr& other) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T>
+    void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
+    }
+
+    template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// sqrtMat
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T>
+    void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
+    }
+
+    template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrtMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrtMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrtMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrtMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrtMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void sqrtMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// logMat
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T>
+    void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
+    }
+
+    template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void logMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void logMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void logMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void logMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void logMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void logMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// expMat
+
+namespace arithm
+{
+    template <typename T> struct Exp : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(T x) const
+        {
+            exp_func<T> f;
+            return saturate_cast<T>(f(x));
+        }
+
+        __device__ __forceinline__ Exp() {}
+        __device__ __forceinline__ Exp(const Exp& other) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T>
+    void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
+    }
+
+    template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void expMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void expMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void expMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void expMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void expMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    template void expMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// pow
+
+namespace arithm
+{
+    template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
+    {
+        float power;
+
+        PowOp(double power_) : power(static_cast<float>(power_)) {}
+
+        __device__ __forceinline__ T operator()(T e) const
+        {
+            return saturate_cast<T>(__powf((float)e, power));
+        }
+    };
+    template<typename T> struct PowOp<T, true> : unary_function<T, T>
+    {
+        float power;
+
+        PowOp(double power_) : power(static_cast<float>(power_)) {}
+
+        __device__ __forceinline__ T operator()(T e) const
+        {
+            T res = saturate_cast<T>(__powf((float)e, power));
+
+            if ((e < 0) && (1 & static_cast<int>(power)))
+                res *= -1;
+
+            return res;
+        }
+    };
+    template<> struct PowOp<float> : unary_function<float, float>
+    {
+        const float power;
+
+        PowOp(double power_) : power(static_cast<float>(power_)) {}
+
+        __device__ __forceinline__ float operator()(float e) const
+        {
+            return __powf(::fabs(e), power);
+        }
+    };
+    template<> struct PowOp<double> : unary_function<double, double>
+    {
+        double power;
+
+        PowOp(double power_) : power(power_) {}
+
+        __device__ __forceinline__ double operator()(double e) const
+        {
+            return ::pow(::fabs(e), power);
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template<typename T>
+    void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
+    }
+
+    template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+    template void pow<schar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+    template void pow<short>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+    template void pow<ushort>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+    template void pow<int>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+    template void pow<float>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+    template void pow<double>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/minmax.cu b/modules/gpuarithm/src/cuda/minmax.cu
new file mode 100644
index 000000000..dd1a1f39d
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/minmax.cu
@@ -0,0 +1,246 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/reduce.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+#include "opencv2/core/cuda/limits.hpp"
+#include "opencv2/core/cuda/utility.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace minMax
+{
+    __device__ unsigned int blocks_finished = 0;
+
+    // To avoid shared bank conflicts we convert each value into value of
+    // appropriate type (32 bits minimum)
+    template <typename T> struct MinMaxTypeTraits;
+    template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<schar> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
+    template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
+
+    template <int BLOCK_SIZE, typename R>
+    struct GlobalReduce
+    {
+        static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval)
+        {
+        #if __CUDA_ARCH__ >= 200
+            if (tid == 0)
+            {
+                Emulation::glob::atomicMin(minval, mymin);
+                Emulation::glob::atomicMax(maxval, mymax);
+            }
+        #else
+            __shared__ bool is_last;
+
+            if (tid == 0)
+            {
+                minval[bid] = mymin;
+                maxval[bid] = mymax;
+
+                __threadfence();
+
+                unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
+                is_last = (ticket == gridDim.x * gridDim.y - 1);
+            }
+
+            __syncthreads();
+
+            if (is_last)
+            {
+                int idx = ::min(tid, gridDim.x * gridDim.y - 1);
+
+                mymin = minval[idx];
+                mymax = maxval[idx];
+
+                const minimum<R> minOp;
+                const maximum<R> maxOp;
+                cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
+
+                if (tid == 0)
+                {
+                    minval[0] = mymin;
+                    maxval[0] = mymax;
+
+                    blocks_finished = 0;
+                }
+            }
+        #endif
+        }
+    };
+
+    template <int BLOCK_SIZE, typename T, typename R, class Mask>
+    __global__ void kernel(const PtrStepSz<T> src, const Mask mask, R* minval, R* maxval, const int twidth, const int theight)
+    {
+        __shared__ R sminval[BLOCK_SIZE];
+        __shared__ R smaxval[BLOCK_SIZE];
+
+        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
+        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
+
+        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
+        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
+
+        R mymin = numeric_limits<R>::max();
+        R mymax = -numeric_limits<R>::max();
+
+        const minimum<R> minOp;
+        const maximum<R> maxOp;
+
+        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
+        {
+            const T* ptr = src.ptr(y);
+
+            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
+            {
+                if (mask(y, x))
+                {
+                    const R srcVal = ptr[x];
+
+                    mymin = minOp(mymin, srcVal);
+                    mymax = maxOp(mymax, srcVal);
+                }
+            }
+        }
+
+        cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
+
+        GlobalReduce<BLOCK_SIZE, R>::run(mymin, mymax, minval, maxval, tid, bid, sminval, smaxval);
+    }
+
+    const int threads_x = 32;
+    const int threads_y = 8;
+
+    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
+    {
+        block = dim3(threads_x, threads_y);
+
+        grid = dim3(divUp(cols, block.x * block.y),
+                    divUp(rows, block.y * block.x));
+
+        grid.x = ::min(grid.x, block.x);
+        grid.y = ::min(grid.y, block.y);
+    }
+
+    void getBufSize(int cols, int rows, int& bufcols, int& bufrows)
+    {
+        dim3 block, grid;
+        getLaunchCfg(cols, rows, block, grid);
+
+        bufcols = grid.x * grid.y * sizeof(double);
+        bufrows = 2;
+    }
+
+    __global__ void setDefaultKernel(int* minval_buf, int* maxval_buf)
+    {
+        *minval_buf = numeric_limits<int>::max();
+        *maxval_buf = numeric_limits<int>::min();
+    }
+    __global__ void setDefaultKernel(float* minval_buf, float* maxval_buf)
+    {
+        *minval_buf = numeric_limits<float>::max();
+        *maxval_buf = -numeric_limits<float>::max();
+    }
+    __global__ void setDefaultKernel(double* minval_buf, double* maxval_buf)
+    {
+        *minval_buf = numeric_limits<double>::max();
+        *maxval_buf = -numeric_limits<double>::max();
+    }
+
+    template <typename R>
+    void setDefault(R* minval_buf, R* maxval_buf)
+    {
+        setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf);
+    }
+
+    template <typename T>
+    void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
+    {
+        typedef typename MinMaxTypeTraits<T>::best_type R;
+
+        dim3 block, grid;
+        getLaunchCfg(src.cols, src.rows, block, grid);
+
+        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
+        const int theight = divUp(divUp(src.rows, grid.y), block.y);
+
+        R* minval_buf = (R*) buf.ptr(0);
+        R* maxval_buf = (R*) buf.ptr(1);
+
+        setDefault(minval_buf, maxval_buf);
+
+        if (mask.data)
+            kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, twidth, theight);
+        else
+            kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight);
+
+        cudaSafeCall( cudaGetLastError() );
+
+        cudaSafeCall( cudaDeviceSynchronize() );
+
+        R minval_, maxval_;
+        cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
+        cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
+        *minval = minval_;
+        *maxval = maxval_;
+    }
+
+    template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
+    template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
+    template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
+    template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
+    template void run<int   >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
+    template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
+    template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/minmax_mat.cu b/modules/gpuarithm/src/cuda/minmax_mat.cu
new file mode 100644
index 000000000..6e9c24722
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/minmax_mat.cu
@@ -0,0 +1,228 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+//////////////////////////////////////////////////////////////////////////
+// min
+
+namespace arithm
+{
+    struct VMin4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vmin4(a, b);
+        }
+
+        __device__ __forceinline__ VMin4() {}
+        __device__ __forceinline__ VMin4(const VMin4& other) {}
+    };
+
+    struct VMin2 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vmin2(a, b);
+        }
+
+        __device__ __forceinline__ VMin2() {}
+        __device__ __forceinline__ VMin2(const VMin2& other) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <> struct TransformFunctorTraits< arithm::VMin2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< minimum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void minMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
+    }
+
+    void minMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
+    }
+
+    template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
+    }
+
+    template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minMat<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+
+    template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
+    }
+
+    template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minScalar<int   >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void minScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// max
+
+namespace arithm
+{
+    struct VMax4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vmax4(a, b);
+        }
+
+        __device__ __forceinline__ VMax4() {}
+        __device__ __forceinline__ VMax4(const VMax4& other) {}
+    };
+
+    struct VMax2 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vmax2(a, b);
+        }
+
+        __device__ __forceinline__ VMax2() {}
+        __device__ __forceinline__ VMax2(const VMax2& other) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <> struct TransformFunctorTraits< arithm::VMax2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< maximum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void maxMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
+    }
+
+    void maxMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
+    }
+
+    template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
+    }
+
+    template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxMat<int   >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
+
+    template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
+    {
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
+    }
+
+    template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxScalar<int   >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+    template void maxScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/minmaxloc.cu b/modules/gpuarithm/src/cuda/minmaxloc.cu
new file mode 100644
index 000000000..08c594d3a
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/minmaxloc.cu
@@ -0,0 +1,235 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/reduce.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+#include "opencv2/core/cuda/limits.hpp"
+#include "opencv2/core/cuda/utility.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace minMaxLoc
+{
+    // To avoid shared bank conflicts we convert each value into value of
+    // appropriate type (32 bits minimum)
+    template <typename T> struct MinMaxTypeTraits;
+    template <> struct MinMaxTypeTraits<unsigned char> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<signed char> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<unsigned short> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
+    template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
+
+    template <int BLOCK_SIZE, typename T, class Mask>
+    __global__ void kernel_pass_1(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight)
+    {
+        typedef typename MinMaxTypeTraits<T>::best_type work_type;
+
+        __shared__ work_type sminval[BLOCK_SIZE];
+        __shared__ work_type smaxval[BLOCK_SIZE];
+        __shared__ unsigned int sminloc[BLOCK_SIZE];
+        __shared__ unsigned int smaxloc[BLOCK_SIZE];
+
+        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
+        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
+
+        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
+        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
+
+        work_type mymin = numeric_limits<work_type>::max();
+        work_type mymax = -numeric_limits<work_type>::max();
+        unsigned int myminloc = 0;
+        unsigned int mymaxloc = 0;
+
+        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
+        {
+            const T* ptr = src.ptr(y);
+
+            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
+            {
+                if (mask(y, x))
+                {
+                    const work_type srcVal = ptr[x];
+
+                    if (srcVal < mymin)
+                    {
+                        mymin = srcVal;
+                        myminloc = y * src.cols + x;
+                    }
+
+                    if (srcVal > mymax)
+                    {
+                        mymax = srcVal;
+                        mymaxloc = y * src.cols + x;
+                    }
+                }
+            }
+        }
+
+        reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
+                                 smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
+                                 tid,
+                                 thrust::make_tuple(less<work_type>(), greater<work_type>()));
+
+        if (tid == 0)
+        {
+            minval[bid] = (T) mymin;
+            maxval[bid] = (T) mymax;
+            minloc[bid] = myminloc;
+            maxloc[bid] = mymaxloc;
+        }
+    }
+    template <int BLOCK_SIZE, typename T>
+    __global__ void kernel_pass_2(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int count)
+    {
+        typedef typename MinMaxTypeTraits<T>::best_type work_type;
+
+        __shared__ work_type sminval[BLOCK_SIZE];
+        __shared__ work_type smaxval[BLOCK_SIZE];
+        __shared__ unsigned int sminloc[BLOCK_SIZE];
+        __shared__ unsigned int smaxloc[BLOCK_SIZE];
+
+        unsigned int idx = ::min(threadIdx.x, count - 1);
+
+        work_type mymin = minval[idx];
+        work_type mymax = maxval[idx];
+        unsigned int myminloc = minloc[idx];
+        unsigned int mymaxloc = maxloc[idx];
+
+        reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
+                                 smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
+                                 threadIdx.x,
+                                 thrust::make_tuple(less<work_type>(), greater<work_type>()));
+
+        if (threadIdx.x == 0)
+        {
+            minval[0] = (T) mymin;
+            maxval[0] = (T) mymax;
+            minloc[0] = myminloc;
+            maxloc[0] = mymaxloc;
+        }
+    }
+
+    const int threads_x = 32;
+    const int threads_y = 8;
+
+    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
+    {
+        block = dim3(threads_x, threads_y);
+
+        grid = dim3(divUp(cols, block.x * block.y),
+                    divUp(rows, block.y * block.x));
+
+        grid.x = ::min(grid.x, block.x);
+        grid.y = ::min(grid.y, block.y);
+    }
+
+    void getBufSize(int cols, int rows, size_t elem_size, int& b1cols, int& b1rows, int& b2cols, int& b2rows)
+    {
+        dim3 block, grid;
+        getLaunchCfg(cols, rows, block, grid);
+
+        // For values
+        b1cols = (int)(grid.x * grid.y * elem_size);
+        b1rows = 2;
+
+        // For locations
+        b2cols = grid.x * grid.y * sizeof(int);
+        b2rows = 2;
+    }
+
+    template <typename T>
+    void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf)
+    {
+        dim3 block, grid;
+        getLaunchCfg(src.cols, src.rows, block, grid);
+
+        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
+        const int theight = divUp(divUp(src.rows, grid.y), block.y);
+
+        T* minval_buf = (T*) valbuf.ptr(0);
+        T* maxval_buf = (T*) valbuf.ptr(1);
+        unsigned int* minloc_buf = locbuf.ptr(0);
+        unsigned int* maxloc_buf = locbuf.ptr(1);
+
+        if (mask.data)
+            kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
+        else
+            kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
+
+        cudaSafeCall( cudaGetLastError() );
+
+        kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
+        cudaSafeCall( cudaGetLastError() );
+
+        cudaSafeCall( cudaDeviceSynchronize() );
+
+        T minval_, maxval_;
+        cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
+        cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
+        *minval = minval_;
+        *maxval = maxval_;
+
+        unsigned int minloc_, maxloc_;
+        cudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
+        cudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
+        minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
+        maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
+    }
+
+    template void run<unsigned char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<signed char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<unsigned short>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<int   >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/mul_mat.cu b/modules/gpuarithm/src/cuda/mul_mat.cu
new file mode 100644
index 000000000..cde44ba56
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/mul_mat.cu
@@ -0,0 +1,211 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    struct Mul_8uc4_32f : binary_function<uint, float, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, float b) const
+        {
+            uint res = 0;
+
+            res |= (saturate_cast<uchar>((0xffu & (a      )) * b)      );
+            res |= (saturate_cast<uchar>((0xffu & (a >>  8)) * b) <<  8);
+            res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
+            res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
+
+            return res;
+        }
+
+        __device__ __forceinline__ Mul_8uc4_32f() {}
+        __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
+    };
+
+    struct Mul_16sc4_32f : binary_function<short4, float, short4>
+    {
+        __device__ __forceinline__ short4 operator ()(short4 a, float b) const
+        {
+            return make_short4(saturate_cast<short>(a.x * b), saturate_cast<short>(a.y * b),
+                               saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
+        }
+
+        __device__ __forceinline__ Mul_16sc4_32f() {}
+        __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
+    };
+
+    template <typename T, typename D> struct Mul : binary_function<T, T, D>
+    {
+        __device__ __forceinline__ D operator ()(T a, T b) const
+        {
+            return saturate_cast<D>(a * b);
+        }
+
+        __device__ __forceinline__ Mul() {}
+        __device__ __forceinline__ Mul(const Mul& other) {}
+    };
+
+    template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
+    {
+        S scale;
+
+        explicit MulScale(S scale_) : scale(scale_) {}
+
+        __device__ __forceinline__ D operator ()(T a, T b) const
+        {
+            return saturate_cast<D>(scale * a * b);
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <typename T, typename D> struct TransformFunctorTraits< arithm::Mul<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+
+    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void mulMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
+    }
+
+    void mulMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
+    }
+
+    template <typename T, typename S, typename D>
+    void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
+    {
+        if (scale == 1)
+        {
+            Mul<T, D> op;
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        }
+        else
+        {
+            MulScale<T, S, D> op(static_cast<S>(scale));
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        }
+    }
+
+    template void mulMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    template void mulMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void mulMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void mulMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void mulMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void mulMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+
+    //template void mulMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    //template void mulMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+    template void mulMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/mul_scalar.cu b/modules/gpuarithm/src/cuda/mul_scalar.cu
new file mode 100644
index 000000000..208bfc622
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/mul_scalar.cu
@@ -0,0 +1,144 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <typename T, typename S, typename D> struct MulScalar : unary_function<T, D>
+    {
+        S val;
+
+        explicit MulScalar(S val_) : val(val_) {}
+
+        __device__ __forceinline__ D operator ()(T a) const
+        {
+            return saturate_cast<D>(a * val);
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T, typename S, typename D>
+    void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
+    {
+        MulScalar<T, S, D> op(static_cast<S>(val));
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+    }
+
+    template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    template void mulScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void mulScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void mulScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void mulScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void mulScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+
+    //template void mulScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    //template void mulScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+    template void mulScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpuarithm/src/cuda/polar_cart.cu
similarity index 100%
rename from modules/gpu/src/cuda/mathfunc.cu
rename to modules/gpuarithm/src/cuda/polar_cart.cu
diff --git a/modules/gpuarithm/src/cuda/reduce.cu b/modules/gpuarithm/src/cuda/reduce.cu
new file mode 100644
index 000000000..f2056b97b
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/reduce.cu
@@ -0,0 +1,330 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/reduce.hpp"
+#include "opencv2/core/cuda/limits.hpp"
+
+#include "unroll_detail.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace reduce
+{
+    struct Sum
+    {
+        template <typename T>
+        __device__ __forceinline__ T startValue() const
+        {
+            return VecTraits<T>::all(0);
+        }
+
+        template <typename T>
+        __device__ __forceinline__ T operator ()(T a, T b) const
+        {
+            return a + b;
+        }
+
+        template <typename T>
+        __device__ __forceinline__ T result(T r, double) const
+        {
+            return r;
+        }
+
+        __device__ __forceinline__ Sum() {}
+        __device__ __forceinline__ Sum(const Sum&) {}
+    };
+
+    struct Avg
+    {
+        template <typename T>
+        __device__ __forceinline__ T startValue() const
+        {
+            return VecTraits<T>::all(0);
+        }
+
+        template <typename T>
+        __device__ __forceinline__ T operator ()(T a, T b) const
+        {
+            return a + b;
+        }
+
+        template <typename T>
+        __device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
+        {
+            return r / sz;
+        }
+
+        __device__ __forceinline__ Avg() {}
+        __device__ __forceinline__ Avg(const Avg&) {}
+    };
+
+    struct Min
+    {
+        template <typename T>
+        __device__ __forceinline__ T startValue() const
+        {
+            return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max());
+        }
+
+        template <typename T>
+        __device__ __forceinline__ T operator ()(T a, T b) const
+        {
+            minimum<T> minOp;
+            return minOp(a, b);
+        }
+
+        template <typename T>
+        __device__ __forceinline__ T result(T r, double) const
+        {
+            return r;
+        }
+
+        __device__ __forceinline__ Min() {}
+        __device__ __forceinline__ Min(const Min&) {}
+    };
+
+    struct Max
+    {
+        template <typename T>
+        __device__ __forceinline__ T startValue() const
+        {
+            return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max());
+        }
+
+        template <typename T>
+        __device__ __forceinline__ T operator ()(T a, T b) const
+        {
+            maximum<T> maxOp;
+            return maxOp(a, b);
+        }
+
+        template <typename T>
+        __device__ __forceinline__ T result(T r, double) const
+        {
+            return r;
+        }
+
+        __device__ __forceinline__ Max() {}
+        __device__ __forceinline__ Max(const Max&) {}
+    };
+
+    ///////////////////////////////////////////////////////////
+
+    template <typename T, typename S, typename D, class Op>
+    __global__ void rowsKernel(const PtrStepSz<T> src, D* dst, const Op op)
+    {
+        __shared__ S smem[16 * 16];
+
+        const int x = blockIdx.x * 16 + threadIdx.x;
+
+        S myVal = op.template startValue<S>();
+
+        if (x < src.cols)
+        {
+            for (int y = threadIdx.y; y < src.rows; y += 16)
+            {
+                S srcVal = src(y, x);
+                myVal = op(myVal, srcVal);
+            }
+        }
+
+        smem[threadIdx.x * 16 + threadIdx.y] = myVal;
+
+        __syncthreads();
+
+        volatile S* srow = smem + threadIdx.y * 16;
+
+        myVal = srow[threadIdx.x];
+        cudev::reduce<16>(srow, myVal, threadIdx.x, op);
+
+        if (threadIdx.x == 0)
+            srow[0] = myVal;
+
+        __syncthreads();
+
+        if (threadIdx.y == 0 && x < src.cols)
+            dst[x] = (D) op.result(smem[threadIdx.x * 16], src.rows);
+    }
+
+    template <typename T, typename S, typename D, class Op>
+    void rowsCaller(PtrStepSz<T> src, D* dst, cudaStream_t stream)
+    {
+        const dim3 block(16, 16);
+        const dim3 grid(divUp(src.cols, block.x));
+
+        Op op;
+        rowsKernel<T, S, D, Op><<<grid, block, 0, stream>>>(src, dst, op);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template <typename T, typename S, typename D>
+    void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSz<T> src, D* dst, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            rowsCaller<T, S, D, Sum>,
+            rowsCaller<T, S, D, Avg>,
+            rowsCaller<T, S, D, Max>,
+            rowsCaller<T, S, D, Min>
+        };
+
+        funcs[op]((PtrStepSz<T>) src, (D*) dst, stream);
+    }
+
+    template void rows<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<unsigned char, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<unsigned char, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<unsigned char, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+
+    template void rows<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<unsigned short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<unsigned short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<unsigned short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+
+    template void rows<short, int, short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+
+    template void rows<int, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<int, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<int, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+
+    template void rows<float, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+    template void rows<float, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+
+    template void rows<double, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
+
+    ///////////////////////////////////////////////////////////
+
+    template <int BLOCK_SIZE, typename T, typename S, typename D, int cn, class Op>
+    __global__ void colsKernel(const PtrStepSz<typename TypeVec<T, cn>::vec_type> src, typename TypeVec<D, cn>::vec_type* dst, const Op op)
+    {
+        typedef typename TypeVec<T, cn>::vec_type src_type;
+        typedef typename TypeVec<S, cn>::vec_type work_type;
+        typedef typename TypeVec<D, cn>::vec_type dst_type;
+
+        __shared__ S smem[BLOCK_SIZE * cn];
+
+        const int y = blockIdx.x;
+
+        const src_type* srcRow = src.ptr(y);
+
+        work_type myVal = op.template startValue<work_type>();
+
+        for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
+            myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));
+
+        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
+
+        if (threadIdx.x == 0)
+            dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));
+    }
+
+    template <typename T, typename S, typename D, int cn, class Op> void colsCaller(PtrStepSzb src, void* dst, cudaStream_t stream)
+    {
+        const int BLOCK_SIZE = 256;
+
+        const dim3 block(BLOCK_SIZE);
+        const dim3 grid(src.rows);
+
+        Op op;
+        colsKernel<BLOCK_SIZE, T, S, D, cn, Op><<<grid, block, 0, stream>>>((PtrStepSz<typename TypeVec<T, cn>::vec_type>) src, (typename TypeVec<D, cn>::vec_type*) dst, op);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+
+    }
+
+    template <typename T, typename S, typename D> void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, void* dst, cudaStream_t stream);
+        static const func_t funcs[5][4] =
+        {
+            {0,0,0,0},
+            {colsCaller<T, S, D, 1, Sum>, colsCaller<T, S, D, 1, Avg>, colsCaller<T, S, D, 1, Max>, colsCaller<T, S, D, 1, Min>},
+            {colsCaller<T, S, D, 2, Sum>, colsCaller<T, S, D, 2, Avg>, colsCaller<T, S, D, 2, Max>, colsCaller<T, S, D, 2, Min>},
+            {colsCaller<T, S, D, 3, Sum>, colsCaller<T, S, D, 3, Avg>, colsCaller<T, S, D, 3, Max>, colsCaller<T, S, D, 3, Min>},
+            {colsCaller<T, S, D, 4, Sum>, colsCaller<T, S, D, 4, Avg>, colsCaller<T, S, D, 4, Max>, colsCaller<T, S, D, 4, Min>},
+        };
+
+        funcs[cn][op](src, dst, stream);
+    }
+
+    template void cols<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<unsigned char, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<unsigned char, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<unsigned char, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+
+    template void cols<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<unsigned short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<unsigned short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<unsigned short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+
+    template void cols<short, int, short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+
+    template void cols<int, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<int, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<int, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+
+    template void cols<float, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+    template void cols<float, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+
+    template void cols<double, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
+}
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/cuda/split_merge.cu b/modules/gpuarithm/src/cuda/split_merge.cu
similarity index 100%
rename from modules/gpu/src/cuda/split_merge.cu
rename to modules/gpuarithm/src/cuda/split_merge.cu
diff --git a/modules/gpuarithm/src/cuda/sub_mat.cu b/modules/gpuarithm/src/cuda/sub_mat.cu
new file mode 100644
index 000000000..adbdb2f50
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/sub_mat.cu
@@ -0,0 +1,185 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    struct VSub4 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vsub4(a, b);
+        }
+
+        __device__ __forceinline__ VSub4() {}
+        __device__ __forceinline__ VSub4(const VSub4& other) {}
+    };
+
+    struct VSub2 : binary_function<uint, uint, uint>
+    {
+        __device__ __forceinline__ uint operator ()(uint a, uint b) const
+        {
+            return vsub2(a, b);
+        }
+
+        __device__ __forceinline__ VSub2() {}
+        __device__ __forceinline__ VSub2(const VSub2& other) {}
+    };
+
+    template <typename T, typename D> struct SubMat : binary_function<T, T, D>
+    {
+        __device__ __forceinline__ D operator ()(T a, T b) const
+        {
+            return saturate_cast<D>(a - b);
+        }
+
+        __device__ __forceinline__ SubMat() {}
+        __device__ __forceinline__ SubMat(const SubMat& other) {}
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+    {
+    };
+
+    template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
+    }
+
+    void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+    {
+        cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
+    }
+
+    template <typename T, typename D>
+    void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
+    }
+
+    template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    template void subMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/sub_scalar.cu b/modules/gpuarithm/src/cuda/sub_scalar.cu
new file mode 100644
index 000000000..ed1c96e00
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/sub_scalar.cu
@@ -0,0 +1,148 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    template <typename T, typename S, typename D> struct SubScalar : unary_function<T, D>
+    {
+        S val;
+
+        explicit SubScalar(S val_) : val(val_) {}
+
+        __device__ __forceinline__ D operator ()(T a) const
+        {
+            return saturate_cast<D>(a - val);
+        }
+    };
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <typename T, typename S, typename D>
+    void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+    {
+        SubScalar<T, S, D> op(static_cast<S>(val));
+
+        if (mask.data)
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
+        else
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+    }
+
+    template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    template void subScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+
+    //template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    //template void subScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+    template void subScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/sum.cu b/modules/gpuarithm/src/cuda/sum.cu
new file mode 100644
index 000000000..3838a7b6c
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/sum.cu
@@ -0,0 +1,380 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/reduce.hpp"
+#include "opencv2/core/cuda/emulation.hpp"
+#include "opencv2/core/cuda/utility.hpp"
+
+#include "unroll_detail.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace sum
+{
+    __device__ unsigned int blocks_finished = 0;
+
+    template <typename R, int cn> struct AtomicAdd;
+    template <typename R> struct AtomicAdd<R, 1>
+    {
+        static __device__ void run(R* ptr, R val)
+        {
+            Emulation::glob::atomicAdd(ptr, val);
+        }
+    };
+    template <typename R> struct AtomicAdd<R, 2>
+    {
+        typedef typename TypeVec<R, 2>::vec_type val_type;
+
+        static __device__ void run(R* ptr, val_type val)
+        {
+            Emulation::glob::atomicAdd(ptr, val.x);
+            Emulation::glob::atomicAdd(ptr + 1, val.y);
+        }
+    };
+    template <typename R> struct AtomicAdd<R, 3>
+    {
+        typedef typename TypeVec<R, 3>::vec_type val_type;
+
+        static __device__ void run(R* ptr, val_type val)
+        {
+            Emulation::glob::atomicAdd(ptr, val.x);
+            Emulation::glob::atomicAdd(ptr + 1, val.y);
+            Emulation::glob::atomicAdd(ptr + 2, val.z);
+        }
+    };
+    template <typename R> struct AtomicAdd<R, 4>
+    {
+        typedef typename TypeVec<R, 4>::vec_type val_type;
+
+        static __device__ void run(R* ptr, val_type val)
+        {
+            Emulation::glob::atomicAdd(ptr, val.x);
+            Emulation::glob::atomicAdd(ptr + 1, val.y);
+            Emulation::glob::atomicAdd(ptr + 2, val.z);
+            Emulation::glob::atomicAdd(ptr + 3, val.w);
+        }
+    };
+
+    template <int BLOCK_SIZE, typename R, int cn>
+    struct GlobalReduce
+    {
+        typedef typename TypeVec<R, cn>::vec_type result_type;
+
+        static __device__ void run(result_type& sum, result_type* result, int tid, int bid, R* smem)
+        {
+        #if __CUDA_ARCH__ >= 200
+            if (tid == 0)
+                AtomicAdd<R, cn>::run((R*) result, sum);
+        #else
+            __shared__ bool is_last;
+
+            if (tid == 0)
+            {
+                result[bid] = sum;
+
+                __threadfence();
+
+                unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
+                is_last = (ticket == gridDim.x * gridDim.y - 1);
+            }
+
+            __syncthreads();
+
+            if (is_last)
+            {
+                sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
+
+                cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
+
+                if (tid == 0)
+                {
+                    result[0] = sum;
+                    blocks_finished = 0;
+                }
+            }
+        #endif
+        }
+    };
+
+    template <int BLOCK_SIZE, typename src_type, typename result_type, class Mask, class Op>
+    __global__ void kernel(const PtrStepSz<src_type> src, result_type* result, const Mask mask, const Op op, const int twidth, const int theight)
+    {
+        typedef typename VecTraits<src_type>::elem_type T;
+        typedef typename VecTraits<result_type>::elem_type R;
+        const int cn = VecTraits<src_type>::cn;
+
+        __shared__ R smem[BLOCK_SIZE * cn];
+
+        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
+        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
+
+        const int tid = threadIdx.y * blockDim.x + threadIdx.x;
+        const int bid = blockIdx.y * gridDim.x + blockIdx.x;
+
+        result_type sum = VecTraits<result_type>::all(0);
+
+        for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
+        {
+            const src_type* ptr = src.ptr(y);
+
+            for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
+            {
+                if (mask(y, x))
+                {
+                    const src_type srcVal = ptr[x];
+                    sum = sum + op(saturate_cast<result_type>(srcVal));
+                }
+            }
+        }
+
+        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
+
+        GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
+    }
+
+    const int threads_x = 32;
+    const int threads_y = 8;
+
+    void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
+    {
+        block = dim3(threads_x, threads_y);
+
+        grid = dim3(divUp(cols, block.x * block.y),
+                    divUp(rows, block.y * block.x));
+
+        grid.x = ::min(grid.x, block.x);
+        grid.y = ::min(grid.y, block.y);
+    }
+
+    void getBufSize(int cols, int rows, int cn, int& bufcols, int& bufrows)
+    {
+        dim3 block, grid;
+        getLaunchCfg(cols, rows, block, grid);
+
+        bufcols = grid.x * grid.y * sizeof(double) * cn;
+        bufrows = 1;
+    }
+
+    template <typename T, typename R, int cn, template <typename> class Op>
+    void caller(PtrStepSzb src_, void* buf_, double* out, PtrStepSzb mask)
+    {
+        typedef typename TypeVec<T, cn>::vec_type src_type;
+        typedef typename TypeVec<R, cn>::vec_type result_type;
+
+        PtrStepSz<src_type> src(src_);
+        result_type* buf = (result_type*) buf_;
+
+        dim3 block, grid;
+        getLaunchCfg(src.cols, src.rows, block, grid);
+
+        const int twidth = divUp(divUp(src.cols, grid.x), block.x);
+        const int theight = divUp(divUp(src.rows, grid.y), block.y);
+
+        Op<result_type> op;
+
+        if (mask.data)
+            kernel<threads_x * threads_y><<<grid, block>>>(src, buf, SingleMask(mask), op, twidth, theight);
+        else
+            kernel<threads_x * threads_y><<<grid, block>>>(src, buf, WithOutMask(), op, twidth, theight);
+        cudaSafeCall( cudaGetLastError() );
+
+        cudaSafeCall( cudaDeviceSynchronize() );
+
+        R result[4] = {0, 0, 0, 0};
+        cudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) );
+
+        out[0] = result[0];
+        out[1] = result[1];
+        out[2] = result[2];
+        out[3] = result[3];
+    }
+
+    template <typename T> struct SumType;
+    template <> struct SumType<uchar> { typedef unsigned int R; };
+    template <> struct SumType<schar> { typedef int R; };
+    template <> struct SumType<ushort> { typedef unsigned int R; };
+    template <> struct SumType<short> { typedef int R; };
+    template <> struct SumType<int> { typedef int R; };
+    template <> struct SumType<float> { typedef float R; };
+    template <> struct SumType<double> { typedef double R; };
+
+    template <typename T, int cn>
+    void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
+    {
+        typedef typename SumType<T>::R R;
+        caller<T, R, cn, identity>(src, buf, out, mask);
+    }
+
+    template void run<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void run<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void run<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void run<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void run<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void run<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void run<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void run<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template <typename T, int cn>
+    void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
+    {
+        typedef typename SumType<T>::R R;
+        caller<T, R, cn, abs_func>(src, buf, out, mask);
+    }
+
+    template void runAbs<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runAbs<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runAbs<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runAbs<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runAbs<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runAbs<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runAbs<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runAbs<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template <typename T> struct Sqr : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(T x) const
+        {
+            return x * x;
+        }
+    };
+
+    template <typename T, int cn>
+    void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
+    {
+        caller<T, double, cn, Sqr>(src, buf, out, mask);
+    }
+
+    template void runSqr<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runSqr<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runSqr<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runSqr<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runSqr<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runSqr<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+
+    template void runSqr<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+    template void runSqr<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/threshold.cu b/modules/gpuarithm/src/cuda/threshold.cu
new file mode 100644
index 000000000..73ce8cee7
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/threshold.cu
@@ -0,0 +1,114 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/simd_functions.hpp"
+
+#include "arithm_func_traits.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+
+    template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
+    {
+    };
+}}}
+
+namespace arithm
+{
+    template <template <typename> class Op, typename T>
+    void threshold_caller(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream)
+    {
+        Op<T> op(thresh, maxVal);
+        cudev::transform(src, dst, op, WithOutMask(), stream);
+    }
+
+    template <typename T>
+    void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream)
+    {
+        typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream);
+
+        static const caller_t callers[] =
+        {
+            threshold_caller<thresh_binary_func, T>,
+            threshold_caller<thresh_binary_inv_func, T>,
+            threshold_caller<thresh_trunc_func, T>,
+            threshold_caller<thresh_to_zero_func, T>,
+            threshold_caller<thresh_to_zero_inv_func, T>
+        };
+
+        callers[type]((PtrStepSz<T>) src, (PtrStepSz<T>) dst, static_cast<T>(thresh), static_cast<T>(maxVal), stream);
+    }
+
+    template void threshold<uchar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+    template void threshold<schar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+    template void threshold<ushort>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+    template void threshold<short>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+    template void threshold<int>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+    template void threshold<float>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+    template void threshold<double>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/transpose.cu b/modules/gpuarithm/src/cuda/transpose.cu
new file mode 100644
index 000000000..b51dc201a
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/transpose.cu
@@ -0,0 +1,122 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace arithm
+{
+    const int TRANSPOSE_TILE_DIM   = 16;
+    const int TRANSPOSE_BLOCK_ROWS = 16;
+
+    template <typename T>
+    __global__ void transposeKernel(const PtrStepSz<T> src, PtrStep<T> dst)
+    {
+        __shared__ T tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM + 1];
+
+        int blockIdx_x, blockIdx_y;
+
+        // do diagonal reordering
+        if (gridDim.x == gridDim.y)
+        {
+            blockIdx_y = blockIdx.x;
+            blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x;
+        }
+        else
+        {
+            int bid = blockIdx.x + gridDim.x * blockIdx.y;
+            blockIdx_y = bid % gridDim.y;
+            blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;
+        }
+
+        int xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
+        int yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
+
+        if (xIndex < src.cols)
+        {
+            for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
+            {
+                if (yIndex + i < src.rows)
+                {
+                    tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex);
+                }
+            }
+        }
+
+        __syncthreads();
+
+        xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
+        yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
+
+        if (xIndex < src.rows)
+        {
+            for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
+            {
+                if (yIndex + i < src.cols)
+                {
+                    dst(yIndex + i, xIndex) = tile[threadIdx.x][threadIdx.y + i];
+                }
+            }
+        }
+    }
+
+    template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
+    {
+        const dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM);
+        const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
+
+        transposeKernel<<<grid, block, 0, stream>>>(src, dst);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template void transpose<int>(PtrStepSz<int> src, PtrStepSz<int> dst, cudaStream_t stream);
+    template void transpose<double>(PtrStepSz<double> src, PtrStepSz<double> dst, cudaStream_t stream);
+}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpuarithm/src/cuda/unroll_detail.hpp b/modules/gpuarithm/src/cuda/unroll_detail.hpp
new file mode 100644
index 000000000..993b10be5
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/unroll_detail.hpp
@@ -0,0 +1,135 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __UNROLL_DETAIL_HPP__
+#define __UNROLL_DETAIL_HPP__
+
+#include <thrust/tuple.h>
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+
+namespace detail
+{
+    template <int cn> struct Unroll;
+    template <> struct Unroll<1>
+    {
+        template <int BLOCK_SIZE, typename R>
+        static __device__ __forceinline__ volatile R* smem_tuple(R* smem)
+        {
+            return smem;
+        }
+
+        template <typename R>
+        static __device__ __forceinline__ R& tie(R& val)
+        {
+            return val;
+        }
+
+        template <class Op>
+        static __device__ __forceinline__ const Op& op(const Op& op)
+        {
+            return op;
+        }
+    };
+    template <> struct Unroll<2>
+    {
+        template <int BLOCK_SIZE, typename R>
+        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem)
+        {
+            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
+        }
+
+        template <typename R>
+        static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        {
+            return thrust::tie(val.x, val.y);
+        }
+
+        template <class Op>
+        static __device__ __forceinline__ const thrust::tuple<Op, Op> op(const Op& op)
+        {
+            return thrust::make_tuple(op, op);
+        }
+    };
+    template <> struct Unroll<3>
+    {
+        template <int BLOCK_SIZE, typename R>
+        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
+        {
+            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
+        }
+
+        template <typename R>
+        static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        {
+            return thrust::tie(val.x, val.y, val.z);
+        }
+
+        template <class Op>
+        static __device__ __forceinline__ const thrust::tuple<Op, Op, Op> op(const Op& op)
+        {
+            return thrust::make_tuple(op, op, op);
+        }
+    };
+    template <> struct Unroll<4>
+    {
+        template <int BLOCK_SIZE, typename R>
+        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
+        {
+            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
+        }
+
+        template <typename R>
+        static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        {
+            return thrust::tie(val.x, val.y, val.z, val.w);
+        }
+
+        template <class Op>
+        static __device__ __forceinline__ const thrust::tuple<Op, Op, Op, Op> op(const Op& op)
+        {
+            return thrust::make_tuple(op, op, op, op);
+        }
+    };
+}
+
+#endif // __UNROLL_DETAIL_HPP__
diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp
similarity index 97%
rename from modules/gpu/src/element_operations.cpp
rename to modules/gpuarithm/src/element_operations.cpp
index 91ebce5f9..f76656019 100644
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpuarithm/src/element_operations.cpp
@@ -78,10 +78,9 @@ void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cud
 void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
-double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_no_cuda(); return 0.0;}
 void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::alphaComp(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
+double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_no_cuda(); return 0.0;}
 
 #else
 
@@ -1793,10 +1792,10 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
 
 namespace arithm
 {
-    void cmpMatEq_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    void cmpMatNe_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    void cmpMatLt_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    void cmpMatLe_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
+    void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
+    void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
+    void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
+    void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
 
     template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
     template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@@ -1820,7 +1819,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
         {cmpMatEq<double>        , cmpMatNe<double>        , cmpMatLt<double>        , cmpMatLe<double>        }
     };
 
-    typedef void (*func_v4_t)(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
+    typedef void (*func_v4_t)(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
     static const func_v4_t funcs_v4[] =
     {
         cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4
@@ -2670,72 +2669,6 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
     funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream));
 }
 
-////////////////////////////////////////////////////////////////////////
-// threshold
-
-namespace arithm
-{
-    template <typename T>
-    void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-}
-
-double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, Stream& s)
-{
-    const int depth = src.depth();
-
-    CV_Assert( src.channels() == 1 && depth <= CV_64F );
-    CV_Assert( type <= THRESH_TOZERO_INV );
-
-    if (depth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    dst.create(src.size(), src.type());
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    if (src.type() == CV_32FC1 && type == THRESH_TRUNC)
-    {
-        NppStreamHandler h(stream);
-
-        NppiSize sz;
-        sz.width  = src.cols;
-        sz.height = src.rows;
-
-        nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step),
-            dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-    else
-    {
-        typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
-        static const func_t funcs[] =
-        {
-            arithm::threshold<unsigned char>,
-            arithm::threshold<signed char>,
-            arithm::threshold<unsigned short>,
-            arithm::threshold<short>,
-            arithm::threshold<int>,
-            arithm::threshold<float>,
-            arithm::threshold<double>
-        };
-
-        if (depth != CV_32F && depth != CV_64F)
-        {
-            thresh = cvFloor(thresh);
-            maxVal = cvRound(maxVal);
-        }
-
-        funcs[depth](src, dst, thresh, maxVal, type, stream);
-    }
-
-    return thresh;
-}
-
 ////////////////////////////////////////////////////////////////////////
 // pow
 
@@ -2777,79 +2710,6 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
     funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream));
 }
 
-////////////////////////////////////////////////////////////////////////
-// alphaComp
-
-namespace
-{
-    template <int DEPTH> struct NppAlphaCompFunc
-    {
-        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
-
-        typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, NppiAlphaOp eAlphaOp);
-    };
-
-    template <int DEPTH, typename NppAlphaCompFunc<DEPTH>::func_t func> struct NppAlphaComp
-    {
-        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
-
-        static void call(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream)
-        {
-            NppStreamHandler h(stream);
-
-            NppiSize oSizeROI;
-            oSizeROI.width = img1.cols;
-            oSizeROI.height = img2.rows;
-
-            nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
-                              dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-}
-
-void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream)
-{
-    static const NppiAlphaOp npp_alpha_ops[] = {
-        NPPI_OP_ALPHA_OVER,
-        NPPI_OP_ALPHA_IN,
-        NPPI_OP_ALPHA_OUT,
-        NPPI_OP_ALPHA_ATOP,
-        NPPI_OP_ALPHA_XOR,
-        NPPI_OP_ALPHA_PLUS,
-        NPPI_OP_ALPHA_OVER_PREMUL,
-        NPPI_OP_ALPHA_IN_PREMUL,
-        NPPI_OP_ALPHA_OUT_PREMUL,
-        NPPI_OP_ALPHA_ATOP_PREMUL,
-        NPPI_OP_ALPHA_XOR_PREMUL,
-        NPPI_OP_ALPHA_PLUS_PREMUL,
-        NPPI_OP_ALPHA_PREMUL
-    };
-
-    typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
-
-    static const func_t funcs[] =
-    {
-        NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
-        0,
-        NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call,
-        0,
-        NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call,
-        NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
-    };
-
-    CV_Assert( img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4 );
-    CV_Assert( img1.size() == img2.size() && img1.type() == img2.type() );
-
-    dst.create(img1.size(), img1.type());
-
-    const func_t func = funcs[img1.depth()];
-
-    func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream));
-}
-
 ////////////////////////////////////////////////////////////////////////
 // addWeighted
 
@@ -3357,4 +3217,70 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2,
     func(src1_, alpha, src2_, beta, gamma, dst_, StreamAccessor::getStream(stream));
 }
 
+////////////////////////////////////////////////////////////////////////
+// threshold
+
+namespace arithm
+{
+    template <typename T>
+    void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+}
+
+double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, Stream& s)
+{
+    const int depth = src.depth();
+
+    CV_Assert( src.channels() == 1 && depth <= CV_64F );
+    CV_Assert( type <= 4/*THRESH_TOZERO_INV*/ );
+
+    if (depth == CV_64F)
+    {
+        if (!deviceSupports(NATIVE_DOUBLE))
+            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
+    }
+
+    dst.create(src.size(), src.type());
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    if (src.type() == CV_32FC1 && type == 2/*THRESH_TRUNC*/)
+    {
+        NppStreamHandler h(stream);
+
+        NppiSize sz;
+        sz.width  = src.cols;
+        sz.height = src.rows;
+
+        nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step),
+            dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+    else
+    {
+        typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
+        static const func_t funcs[] =
+        {
+            arithm::threshold<unsigned char>,
+            arithm::threshold<signed char>,
+            arithm::threshold<unsigned short>,
+            arithm::threshold<short>,
+            arithm::threshold<int>,
+            arithm::threshold<float>,
+            arithm::threshold<double>
+        };
+
+        if (depth != CV_32F && depth != CV_64F)
+        {
+            thresh = cvFloor(thresh);
+            maxVal = cvRound(maxVal);
+        }
+
+        funcs[depth](src, dst, thresh, maxVal, type, stream);
+    }
+
+    return thresh;
+}
+
 #endif
diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpuarithm/src/matrix_reductions.cpp
similarity index 100%
rename from modules/gpu/src/matrix_reductions.cpp
rename to modules/gpuarithm/src/matrix_reductions.cpp
diff --git a/modules/gpuarithm/src/precomp.cpp b/modules/gpuarithm/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpuarithm/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpuarithm/src/precomp.hpp b/modules/gpuarithm/src/precomp.hpp
new file mode 100644
index 000000000..7d36adb46
--- /dev/null
+++ b/modules/gpuarithm/src/precomp.hpp
@@ -0,0 +1,58 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/core_c.h"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#ifdef HAVE_CUBLAS
+    #include <cublas.h>
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/src/split_merge.cpp b/modules/gpuarithm/src/split_merge.cpp
similarity index 100%
rename from modules/gpu/src/split_merge.cpp
rename to modules/gpuarithm/src/split_merge.cpp
diff --git a/modules/gpu/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp
similarity index 100%
rename from modules/gpu/test/test_core.cpp
rename to modules/gpuarithm/test/test_core.cpp
diff --git a/modules/gpuarithm/test/test_main.cpp b/modules/gpuarithm/test/test_main.cpp
new file mode 100644
index 000000000..c37a85cb4
--- /dev/null
+++ b/modules/gpuarithm/test/test_main.cpp
@@ -0,0 +1,120 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+using namespace testing;
+
+int main(int argc, char** argv)
+{
+    try
+    {
+        const std::string keys =
+                "{ h help ?            |      | Print help}"
+                "{ i info              |      | Print information about system and exit }"
+                "{ device              | -1   | Device on which tests will be executed (-1 means all devices) }"
+                ;
+
+        CommandLineParser cmd(argc, (const char**)argv, keys);
+
+        if (cmd.has("help"))
+        {
+            cmd.printMessage();
+            return 0;
+        }
+
+        printCudaInfo();
+
+        if (cmd.has("info"))
+        {
+            return 0;
+        }
+
+        int device = cmd.get<int>("device");
+        if (device < 0)
+        {
+            DeviceManager::instance().loadAll();
+
+            cout << "Run tests on all supported devices \n" << endl;
+        }
+        else
+        {
+            DeviceManager::instance().load(device);
+
+            DeviceInfo info(device);
+            cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
+        }
+
+        TS::ptr()->init("gpu");
+        InitGoogleTest(&argc, argv);
+
+        return RUN_ALL_TESTS();
+    }
+    catch (const exception& e)
+    {
+        cerr << e.what() << endl;
+        return -1;
+    }
+    catch (...)
+    {
+        cerr << "Unknown error" << endl;
+        return -1;
+    }
+
+    return 0;
+}
+
+#else // HAVE_CUDA
+
+int main()
+{
+    printf("OpenCV was built without CUDA support\n");
+    return 0;
+}
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuarithm/test/test_precomp.cpp b/modules/gpuarithm/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpuarithm/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpuarithm/test/test_precomp.hpp b/modules/gpuarithm/test/test_precomp.hpp
new file mode 100644
index 000000000..089627758
--- /dev/null
+++ b/modules/gpuarithm/test/test_precomp.hpp
@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/core.hpp"
+#include "opencv2/gpuarithm.hpp"
+
+#endif
diff --git a/modules/stitching/CMakeLists.txt b/modules/stitching/CMakeLists.txt
index 5d48bd378..2282d8123 100644
--- a/modules/stitching/CMakeLists.txt
+++ b/modules/stitching/CMakeLists.txt
@@ -1,3 +1,3 @@
 set(the_description "Images stitching")
-ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_nonfree)
+ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_gpuarithm opencv_nonfree)
 
diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt
index d111a79eb..fae24e17e 100644
--- a/modules/superres/CMakeLists.txt
+++ b/modules/superres/CMakeLists.txt
@@ -4,4 +4,4 @@ endif()
 
 set(the_description "Super Resolution")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
-ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui opencv_gpucodec)
+ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_highgui opencv_gpu opencv_gpucodec opencv_gpuarithm)
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index c7f63bf72..92f7dc790 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -17,6 +17,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
   ocv_include_modules(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS})
 
   if(HAVE_opencv_gpu)
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 015df939b..efff5f939 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec)
+                                     opencv_gpucodec opencv_gpuarithm)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 

From 84de6ce0363b6e894428272d2099ae16b5c42337 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:26:59 +0400
Subject: [PATCH 03/49] gpufilters module for image filtering

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/doc/gpu.rst                       |   1 -
 modules/gpu/include/opencv2/gpu.hpp           | 215 +-------------
 modules/gpu/src/cuda/imgproc.cu               | 106 -------
 modules/gpufilters/CMakeLists.txt             |   9 +
 .../doc/filtering.rst}                        |   0
 modules/gpufilters/doc/gpufilters.rst         |   8 +
 .../gpufilters/include/opencv2/gpufilters.hpp | 269 ++++++++++++++++++
 .../{gpu => gpufilters}/perf/perf_filters.cpp |  18 +-
 modules/gpufilters/perf/perf_main.cpp         |  47 +++
 modules/gpufilters/perf/perf_precomp.cpp      |  43 +++
 modules/gpufilters/perf/perf_precomp.hpp      |  64 +++++
 .../src/cuda/column_filter.16sc1.cu}          |   2 +-
 .../src/cuda/column_filter.16sc3.cu}          |   2 +-
 .../src/cuda/column_filter.16sc4.cu}          |   2 +-
 .../src/cuda/column_filter.16uc1.cu}          |   2 +-
 .../src/cuda/column_filter.16uc3.cu}          |   2 +-
 .../src/cuda/column_filter.16uc4.cu}          |   2 +-
 .../src/cuda/column_filter.32fc1.cu}          |   2 +-
 .../src/cuda/column_filter.32fc3.cu}          |   2 +-
 .../src/cuda/column_filter.32fc4.cu}          |   2 +-
 .../src/cuda/column_filter.32sc1.cu}          |   2 +-
 .../src/cuda/column_filter.32sc3.cu}          |   2 +-
 .../src/cuda/column_filter.32sc4.cu}          |   2 +-
 .../src/cuda/column_filter.8uc1.cu}           |   2 +-
 .../src/cuda/column_filter.8uc3.cu}           |   2 +-
 .../src/cuda/column_filter.8uc4.cu}           |   2 +-
 .../src/cuda/column_filter.hpp}               |   0
 modules/gpufilters/src/cuda/filter2d.cu       | 158 ++++++++++
 .../src/cuda/row_filter.16sc1.cu}             |   2 +-
 .../src/cuda/row_filter.16sc3.cu}             |   2 +-
 .../src/cuda/row_filter.16sc4.cu}             |   2 +-
 .../src/cuda/row_filter.16uc1.cu}             |   2 +-
 .../src/cuda/row_filter.16uc3.cu}             |   2 +-
 .../src/cuda/row_filter.16uc4.cu}             |   2 +-
 .../src/cuda/row_filter.32fc1.cu}             |   2 +-
 .../src/cuda/row_filter.32fc3.cu}             |   2 +-
 .../src/cuda/row_filter.32fc4.cu}             |   2 +-
 .../src/cuda/row_filter.32sc1.cu}             |   2 +-
 .../src/cuda/row_filter.32sc3.cu}             |   2 +-
 .../src/cuda/row_filter.32sc4.cu}             |   2 +-
 .../src/cuda/row_filter.8uc1.cu}              |   2 +-
 .../src/cuda/row_filter.8uc3.cu}              |   2 +-
 .../src/cuda/row_filter.8uc4.cu}              |   2 +-
 .../src/cuda/row_filter.hpp}                  |   0
 modules/{gpu => gpufilters}/src/filtering.cpp |  24 +-
 modules/gpufilters/src/precomp.cpp            |  43 +++
 modules/gpufilters/src/precomp.hpp            |  59 ++++
 .../{gpu => gpufilters}/test/test_filters.cpp |  18 +-
 modules/gpufilters/test/test_main.cpp         | 120 ++++++++
 modules/gpufilters/test/test_precomp.cpp      |  43 +++
 modules/gpufilters/test/test_precomp.hpp      |  60 ++++
 modules/stitching/CMakeLists.txt              |   2 +-
 modules/superres/CMakeLists.txt               |   2 +-
 samples/cpp/CMakeLists.txt                    |   1 +
 samples/gpu/CMakeLists.txt                    |   2 +-
 56 files changed, 995 insertions(+), 379 deletions(-)
 create mode 100644 modules/gpufilters/CMakeLists.txt
 rename modules/{gpu/doc/image_filtering.rst => gpufilters/doc/filtering.rst} (100%)
 create mode 100644 modules/gpufilters/doc/gpufilters.rst
 create mode 100644 modules/gpufilters/include/opencv2/gpufilters.hpp
 rename modules/{gpu => gpufilters}/perf/perf_filters.cpp (89%)
 create mode 100644 modules/gpufilters/perf/perf_main.cpp
 create mode 100644 modules/gpufilters/perf/perf_precomp.cpp
 create mode 100644 modules/gpufilters/perf/perf_precomp.hpp
 rename modules/{gpu/src/cuda/column_filter.8.cu => gpufilters/src/cuda/column_filter.16sc1.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.3.cu => gpufilters/src/cuda/column_filter.16sc3.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.9.cu => gpufilters/src/cuda/column_filter.16sc4.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.10.cu => gpufilters/src/cuda/column_filter.16uc1.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.11.cu => gpufilters/src/cuda/column_filter.16uc3.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.12.cu => gpufilters/src/cuda/column_filter.16uc4.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.5.cu => gpufilters/src/cuda/column_filter.32fc1.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.6.cu => gpufilters/src/cuda/column_filter.32fc3.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.7.cu => gpufilters/src/cuda/column_filter.32fc4.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.4.cu => gpufilters/src/cuda/column_filter.32sc1.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.13.cu => gpufilters/src/cuda/column_filter.32sc3.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.14.cu => gpufilters/src/cuda/column_filter.32sc4.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.0.cu => gpufilters/src/cuda/column_filter.8uc1.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.1.cu => gpufilters/src/cuda/column_filter.8uc3.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.2.cu => gpufilters/src/cuda/column_filter.8uc4.cu} (98%)
 rename modules/{gpu/src/cuda/column_filter.h => gpufilters/src/cuda/column_filter.hpp} (100%)
 create mode 100644 modules/gpufilters/src/cuda/filter2d.cu
 rename modules/{gpu/src/cuda/row_filter.8.cu => gpufilters/src/cuda/row_filter.16sc1.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.3.cu => gpufilters/src/cuda/row_filter.16sc3.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.9.cu => gpufilters/src/cuda/row_filter.16sc4.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.10.cu => gpufilters/src/cuda/row_filter.16uc1.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.11.cu => gpufilters/src/cuda/row_filter.16uc3.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.12.cu => gpufilters/src/cuda/row_filter.16uc4.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.5.cu => gpufilters/src/cuda/row_filter.32fc1.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.6.cu => gpufilters/src/cuda/row_filter.32fc3.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.7.cu => gpufilters/src/cuda/row_filter.32fc4.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.4.cu => gpufilters/src/cuda/row_filter.32sc1.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.13.cu => gpufilters/src/cuda/row_filter.32sc3.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.14.cu => gpufilters/src/cuda/row_filter.32sc4.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.0.cu => gpufilters/src/cuda/row_filter.8uc1.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.1.cu => gpufilters/src/cuda/row_filter.8uc3.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.2.cu => gpufilters/src/cuda/row_filter.8uc4.cu} (98%)
 rename modules/{gpu/src/cuda/row_filter.h => gpufilters/src/cuda/row_filter.hpp} (100%)
 rename modules/{gpu => gpufilters}/src/filtering.cpp (99%)
 create mode 100644 modules/gpufilters/src/precomp.cpp
 create mode 100644 modules/gpufilters/src/precomp.hpp
 rename modules/{gpu => gpufilters}/test/test_filters.cpp (96%)
 create mode 100644 modules/gpufilters/test/test_main.cpp
 create mode 100644 modules/gpufilters/test/test_precomp.cpp
 create mode 100644 modules/gpufilters/test/test_precomp.hpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 2f884b3f9..1ed980694 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -3,7 +3,7 @@ if(ANDROID OR IOS)
 endif()
 
 set(the_description "GPU-accelerated Computer Vision")
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm)
+ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
 
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index f17ed7079..de52ceaba 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -11,6 +11,5 @@ gpu. GPU-accelerated Computer Vision
     image_processing
     object_detection
     feature_detection_and_description
-    image_filtering
     camera_calibration_and_3d_reconstruction
     video
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index cfad81738..8f837da08 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -51,225 +51,12 @@
 
 #include "opencv2/core/gpumat.hpp"
 #include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpufilters.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/features2d.hpp"
 
 namespace cv { namespace gpu {
-//////////////////////////////// Filter Engine ////////////////////////////////
-
-/*!
-The Base Class for 1D or Row-wise Filters
-
-This is the base class for linear or non-linear filters that process 1D data.
-In particular, such filters are used for the "horizontal" filtering parts in separable filters.
-*/
-class CV_EXPORTS BaseRowFilter_GPU
-{
-public:
-    BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
-    virtual ~BaseRowFilter_GPU() {}
-    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
-    int ksize, anchor;
-};
-
-/*!
-The Base Class for Column-wise Filters
-
-This is the base class for linear or non-linear filters that process columns of 2D arrays.
-Such filters are used for the "vertical" filtering parts in separable filters.
-*/
-class CV_EXPORTS BaseColumnFilter_GPU
-{
-public:
-    BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
-    virtual ~BaseColumnFilter_GPU() {}
-    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
-    int ksize, anchor;
-};
-
-/*!
-The Base Class for Non-Separable 2D Filters.
-
-This is the base class for linear or non-linear 2D filters.
-*/
-class CV_EXPORTS BaseFilter_GPU
-{
-public:
-    BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {}
-    virtual ~BaseFilter_GPU() {}
-    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
-    Size ksize;
-    Point anchor;
-};
-
-/*!
-The Base Class for Filter Engine.
-
-The class can be used to apply an arbitrary filtering operation to an image.
-It contains all the necessary intermediate buffers.
-*/
-class CV_EXPORTS FilterEngine_GPU
-{
-public:
-    virtual ~FilterEngine_GPU() {}
-
-    virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0;
-};
-
-//! returns the non-separable filter engine with the specified filter
-CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU>& filter2D, int srcType, int dstType);
-
-//! returns the separable filter engine with the specified filters
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
-    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
-    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf);
-
-//! returns horizontal 1D box filter
-//! supports only CV_8UC1 source type and CV_32FC1 sum type
-CV_EXPORTS Ptr<BaseRowFilter_GPU> getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1);
-
-//! returns vertical 1D box filter
-//! supports only CV_8UC1 sum type and CV_32FC1 dst type
-CV_EXPORTS Ptr<BaseColumnFilter_GPU> getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1);
-
-//! returns 2D box filter
-//! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
-CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1));
-
-//! returns box filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size& ksize,
-    const Point& anchor = Point(-1,-1));
-
-//! returns 2D morphological filter
-//! only MORPH_ERODE and MORPH_DILATE are supported
-//! supports CV_8UC1 and CV_8UC4 types
-//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
-CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize,
-    Point anchor=Point(-1,-1));
-
-//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
-CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel,
-    const Point& anchor = Point(-1,-1), int iterations = 1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
-    const Point& anchor = Point(-1,-1), int iterations = 1);
-
-//! returns 2D filter with the specified kernel
-//! supports CV_8U, CV_16U and CV_32F one and four channel image
-CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
-//! returns the non-separable linear filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
-    Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
-
-//! returns the primitive row filter with the specified kernel.
-//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
-//! there are two version of algorithm: NPP and OpenCV.
-//! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,
-//! otherwise calls OpenCV version.
-//! NPP supports only BORDER_CONSTANT border type.
-//! OpenCV version supports only CV_32F as buffer depth and
-//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
-CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
-    int anchor = -1, int borderType = BORDER_DEFAULT);
-
-//! returns the primitive column filter with the specified kernel.
-//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
-//! there are two version of algorithm: NPP and OpenCV.
-//! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,
-//! otherwise calls OpenCV version.
-//! NPP supports only BORDER_CONSTANT border type.
-//! OpenCV version supports only CV_32F as buffer depth and
-//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
-CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
-    int anchor = -1, int borderType = BORDER_DEFAULT);
-
-//! returns the separable linear filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
-    const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
-    int columnBorderType = -1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
-    const Mat& columnKernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
-    int columnBorderType = -1);
-
-//! returns filter engine for the generalized Sobel operator
-CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize,
-                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf,
-                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-
-//! returns the Gaussian filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0,
-                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
-                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-
-//! returns maximum filter
-CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
-
-//! returns minimum filter
-CV_EXPORTS Ptr<BaseFilter_GPU> getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
-
-//! smooths the image using the normalized box filter
-//! supports CV_8UC1, CV_8UC4 types
-CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
-
-//! a synonym for normalized box filter
-static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null())
-{
-    boxFilter(src, dst, -1, ksize, anchor, stream);
-}
-
-//! erodes the image (applies the local minimum operator)
-CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
-CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
-                      Point anchor = Point(-1, -1), int iterations = 1,
-                      Stream& stream = Stream::Null());
-
-//! dilates the image (applies the local maximum operator)
-CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
-CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
-                       Point anchor = Point(-1, -1), int iterations = 1,
-                       Stream& stream = Stream::Null());
-
-//! applies an advanced morphological operation to the image
-CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
-CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
-                             Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
-
-//! applies non-separable 2D linear filter to the image
-CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
-
-//! applies separable 2D linear filter to the image
-CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
-                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
-                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1,
-                            Stream& stream = Stream::Null());
-
-//! applies generalized Sobel operator to the image
-CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
-                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize = 3, double scale = 1,
-                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
-
-//! applies the vertical or horizontal Scharr operator to the image
-CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
-                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale = 1,
-                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
-
-//! smooths the image using Gaussian filter.
-CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
-                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
-                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
-
-//! applies Laplacian operator to the image
-//! supports only ksize = 1 and ksize = 3
-CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
-
 ////////////////////////////// Image processing //////////////////////////////
 
 
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu
index fc27ec19a..71f5e872c 100644
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -895,112 +895,6 @@ namespace cv { namespace gpu { namespace cudev
             if (stream == 0)
                 cudaSafeCall(cudaDeviceSynchronize());
         }
-
-        //////////////////////////////////////////////////////////////////////////
-        // filter2D
-
-        #define FILTER2D_MAX_KERNEL_SIZE 16
-
-        __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
-
-        template <class SrcT, typename D>
-        __global__ void filter2D(const SrcT src, PtrStepSz<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
-        {
-            typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
-
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= dst.cols || y >= dst.rows)
-                return;
-
-            sum_t res = VecTraits<sum_t>::all(0);
-            int kInd = 0;
-
-            for (int i = 0; i < kHeight; ++i)
-            {
-                for (int j = 0; j < kWidth; ++j)
-                    res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++];
-            }
-
-            dst(y, x) = saturate_cast<D>(res);
-        }
-
-        template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
-
-        #define IMPLEMENT_FILTER2D_TEX_READER(type) \
-            texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
-            struct tex_filter2D_ ## type ## _reader \
-            { \
-                typedef type elem_type; \
-                typedef int index_type; \
-                const int xoff; \
-                const int yoff; \
-                tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
-                __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
-                { \
-                    return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
-                } \
-            }; \
-            template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
-            { \
-                static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, \
-                    int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
-                { \
-                    typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
-                    dim3 block(16, 16); \
-                    dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
-                    bindTexture(&tex_filter2D_ ## type , srcWhole); \
-                    tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
-                    Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
-                    BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
-                    filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
-                    cudaSafeCall( cudaGetLastError() ); \
-                    if (stream == 0) \
-                        cudaSafeCall( cudaDeviceSynchronize() ); \
-                } \
-            };
-
-        IMPLEMENT_FILTER2D_TEX_READER(uchar);
-        IMPLEMENT_FILTER2D_TEX_READER(uchar4);
-
-        IMPLEMENT_FILTER2D_TEX_READER(ushort);
-        IMPLEMENT_FILTER2D_TEX_READER(ushort4);
-
-        IMPLEMENT_FILTER2D_TEX_READER(float);
-        IMPLEMENT_FILTER2D_TEX_READER(float4);
-
-        #undef IMPLEMENT_FILTER2D_TEX_READER
-
-        template <typename T, typename D>
-        void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
-                          int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
-                          int borderMode, const float* borderValue, cudaStream_t stream)
-        {
-            typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
-            static const func_t funcs[] =
-            {
-                Filter2DCaller<T, D, BrdReflect101>::call,
-                Filter2DCaller<T, D, BrdReplicate>::call,
-                Filter2DCaller<T, D, BrdConstant>::call,
-                Filter2DCaller<T, D, BrdReflect>::call,
-                Filter2DCaller<T, D, BrdWrap>::call
-            };
-
-            if (stream == 0)
-                cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
-            else
-                cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
-
-            funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
-        }
-
-        template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
     } // namespace imgproc
 }}} // namespace cv { namespace gpu { namespace cudev {
 
diff --git a/modules/gpufilters/CMakeLists.txt b/modules/gpufilters/CMakeLists.txt
new file mode 100644
index 000000000..18f6d7f7b
--- /dev/null
+++ b/modules/gpufilters/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpufilters)
+endif()
+
+set(the_description "GPU-accelerated Image Filtering")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpufilters opencv_imgproc OPTIONAL opencv_gpuarithm)
diff --git a/modules/gpu/doc/image_filtering.rst b/modules/gpufilters/doc/filtering.rst
similarity index 100%
rename from modules/gpu/doc/image_filtering.rst
rename to modules/gpufilters/doc/filtering.rst
diff --git a/modules/gpufilters/doc/gpufilters.rst b/modules/gpufilters/doc/gpufilters.rst
new file mode 100644
index 000000000..778554b13
--- /dev/null
+++ b/modules/gpufilters/doc/gpufilters.rst
@@ -0,0 +1,8 @@
+*******************************************
+gpufilters. GPU-accelerated Image Filtering
+*******************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    filtering
diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp
new file mode 100644
index 000000000..853755de4
--- /dev/null
+++ b/modules/gpufilters/include/opencv2/gpufilters.hpp
@@ -0,0 +1,269 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUFILTERS_HPP__
+#define __OPENCV_GPUFILTERS_HPP__
+
+#ifndef __cplusplus
+#  error gpufilters.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/gpumat.hpp"
+#include "opencv2/core/base.hpp"
+
+namespace cv { namespace gpu {
+
+/*!
+The Base Class for 1D or Row-wise Filters
+
+This is the base class for linear or non-linear filters that process 1D data.
+In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+*/
+class CV_EXPORTS BaseRowFilter_GPU
+{
+public:
+    BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
+    virtual ~BaseRowFilter_GPU() {}
+    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+    int ksize, anchor;
+};
+
+/*!
+The Base Class for Column-wise Filters
+
+This is the base class for linear or non-linear filters that process columns of 2D arrays.
+Such filters are used for the "vertical" filtering parts in separable filters.
+*/
+class CV_EXPORTS BaseColumnFilter_GPU
+{
+public:
+    BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
+    virtual ~BaseColumnFilter_GPU() {}
+    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+    int ksize, anchor;
+};
+
+/*!
+The Base Class for Non-Separable 2D Filters.
+
+This is the base class for linear or non-linear 2D filters.
+*/
+class CV_EXPORTS BaseFilter_GPU
+{
+public:
+    BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {}
+    virtual ~BaseFilter_GPU() {}
+    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+    Size ksize;
+    Point anchor;
+};
+
+/*!
+The Base Class for Filter Engine.
+
+The class can be used to apply an arbitrary filtering operation to an image.
+It contains all the necessary intermediate buffers.
+*/
+class CV_EXPORTS FilterEngine_GPU
+{
+public:
+    virtual ~FilterEngine_GPU() {}
+
+    virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0;
+};
+
+//! returns the non-separable filter engine with the specified filter
+CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU>& filter2D, int srcType, int dstType);
+
+//! returns the separable filter engine with the specified filters
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
+    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
+    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf);
+
+//! returns horizontal 1D box filter
+//! supports only CV_8UC1 source type and CV_32FC1 sum type
+CV_EXPORTS Ptr<BaseRowFilter_GPU> getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1);
+
+//! returns vertical 1D box filter
+//! supports only CV_8UC1 sum type and CV_32FC1 dst type
+CV_EXPORTS Ptr<BaseColumnFilter_GPU> getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1);
+
+//! returns 2D box filter
+//! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
+CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1));
+
+//! returns box filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size& ksize,
+    const Point& anchor = Point(-1,-1));
+
+//! returns 2D morphological filter
+//! only MORPH_ERODE and MORPH_DILATE are supported
+//! supports CV_8UC1 and CV_8UC4 types
+//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize,
+    Point anchor=Point(-1,-1));
+
+//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel,
+    const Point& anchor = Point(-1,-1), int iterations = 1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
+    const Point& anchor = Point(-1,-1), int iterations = 1);
+
+//! returns 2D filter with the specified kernel
+//! supports CV_8U, CV_16U and CV_32F one and four channel image
+CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+//! returns the non-separable linear filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
+    Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
+
+//! returns the primitive row filter with the specified kernel.
+//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
+//! there are two version of algorithm: NPP and OpenCV.
+//! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,
+//! otherwise calls OpenCV version.
+//! NPP supports only BORDER_CONSTANT border type.
+//! OpenCV version supports only CV_32F as buffer depth and
+//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
+CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
+    int anchor = -1, int borderType = BORDER_DEFAULT);
+
+//! returns the primitive column filter with the specified kernel.
+//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
+//! there are two version of algorithm: NPP and OpenCV.
+//! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,
+//! otherwise calls OpenCV version.
+//! NPP supports only BORDER_CONSTANT border type.
+//! OpenCV version supports only CV_32F as buffer depth and
+//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
+CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
+    int anchor = -1, int borderType = BORDER_DEFAULT);
+
+//! returns the separable linear filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
+    const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
+    int columnBorderType = -1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
+    const Mat& columnKernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
+    int columnBorderType = -1);
+
+//! returns filter engine for the generalized Sobel operator
+CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize,
+                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf,
+                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+
+//! returns the Gaussian filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0,
+                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
+                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+
+//! returns maximum filter
+CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
+
+//! returns minimum filter
+CV_EXPORTS Ptr<BaseFilter_GPU> getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
+
+//! smooths the image using the normalized box filter
+//! supports CV_8UC1, CV_8UC4 types
+CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
+
+//! a synonym for normalized box filter
+static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null())
+{
+    boxFilter(src, dst, -1, ksize, anchor, stream);
+}
+
+//! erodes the image (applies the local minimum operator)
+CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
+                      Point anchor = Point(-1, -1), int iterations = 1,
+                      Stream& stream = Stream::Null());
+
+//! dilates the image (applies the local maximum operator)
+CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
+                       Point anchor = Point(-1, -1), int iterations = 1,
+                       Stream& stream = Stream::Null());
+
+//! applies an advanced morphological operation to the image
+CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
+                             Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
+
+//! applies non-separable 2D linear filter to the image
+CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
+
+//! applies separable 2D linear filter to the image
+CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
+                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
+                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1,
+                            Stream& stream = Stream::Null());
+
+//! applies generalized Sobel operator to the image
+CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
+                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize = 3, double scale = 1,
+                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+
+//! applies the vertical or horizontal Scharr operator to the image
+CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
+                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale = 1,
+                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+
+//! smooths the image using Gaussian filter.
+CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
+                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
+                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+
+//! applies Laplacian operator to the image
+//! supports only ksize = 1 and ksize = 3
+CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUFILTERS_HPP__ */
diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp
similarity index 89%
rename from modules/gpu/perf/perf_filters.cpp
rename to modules/gpufilters/perf/perf_filters.cpp
index 40d88aad4..0dc506bc9 100644
--- a/modules/gpu/perf/perf_filters.cpp
+++ b/modules/gpufilters/perf/perf_filters.cpp
@@ -51,7 +51,7 @@ using namespace perf;
 
 DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
 
-PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
+PERF_TEST_P(Sz_Type_KernelSz, Blur,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8UC1, CV_8UC4),
                     Values(3, 5, 7)))
@@ -87,7 +87,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
 //////////////////////////////////////////////////////////////////////
 // Sobel
 
-PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
+PERF_TEST_P(Sz_Type_KernelSz, Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
 {
     declare.time(20.0);
 
@@ -121,7 +121,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Valu
 //////////////////////////////////////////////////////////////////////
 // Scharr
 
-PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
+PERF_TEST_P(Sz_Type, Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
 {
     declare.time(20.0);
 
@@ -154,7 +154,7 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
 //////////////////////////////////////////////////////////////////////
 // GaussianBlur
 
-PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
+PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
 {
     declare.time(20.0);
 
@@ -188,7 +188,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZE
 //////////////////////////////////////////////////////////////////////
 // Laplacian
 
-PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
+PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
 {
     declare.time(20.0);
 
@@ -221,7 +221,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES,
 //////////////////////////////////////////////////////////////////////
 // Erode
 
-PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
+PERF_TEST_P(Sz_Type, Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
 {
     declare.time(20.0);
 
@@ -256,7 +256,7 @@ PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC
 //////////////////////////////////////////////////////////////////////
 // Dilate
 
-PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
+PERF_TEST_P(Sz_Type, Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
 {
     declare.time(20.0);
 
@@ -295,7 +295,7 @@ CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BL
 
 DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
 
-PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), MorphOp::all()))
+PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), MorphOp::all()))
 {
     declare.time(20.0);
 
@@ -332,7 +332,7 @@ PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Val
 //////////////////////////////////////////////////////////////////////
 // Filter2D
 
-PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
+PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
 {
     declare.time(20.0);
 
diff --git a/modules/gpufilters/perf/perf_main.cpp b/modules/gpufilters/perf/perf_main.cpp
new file mode 100644
index 000000000..b5a3eda40
--- /dev/null
+++ b/modules/gpufilters/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpufilters, printCudaInfo())
diff --git a/modules/gpufilters/perf/perf_precomp.cpp b/modules/gpufilters/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpufilters/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpufilters/perf/perf_precomp.hpp b/modules/gpufilters/perf/perf_precomp.hpp
new file mode 100644
index 000000000..02ca5ceac
--- /dev/null
+++ b/modules/gpufilters/perf/perf_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/imgproc.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/src/cuda/column_filter.8.cu b/modules/gpufilters/src/cuda/column_filter.16sc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.8.cu
rename to modules/gpufilters/src/cuda/column_filter.16sc1.cu
index 0a63a1dd4..d4c6d19ab 100644
--- a/modules/gpu/src/cuda/column_filter.8.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16sc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.3.cu b/modules/gpufilters/src/cuda/column_filter.16sc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.3.cu
rename to modules/gpufilters/src/cuda/column_filter.16sc3.cu
index 7304565b9..419fdea65 100644
--- a/modules/gpu/src/cuda/column_filter.3.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16sc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.9.cu b/modules/gpufilters/src/cuda/column_filter.16sc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.9.cu
rename to modules/gpufilters/src/cuda/column_filter.16sc4.cu
index 758d9289d..1caeb8775 100644
--- a/modules/gpu/src/cuda/column_filter.9.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16sc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.10.cu b/modules/gpufilters/src/cuda/column_filter.16uc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.10.cu
rename to modules/gpufilters/src/cuda/column_filter.16uc1.cu
index b71e25207..dc68b710f 100644
--- a/modules/gpu/src/cuda/column_filter.10.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16uc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.11.cu b/modules/gpufilters/src/cuda/column_filter.16uc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.11.cu
rename to modules/gpufilters/src/cuda/column_filter.16uc3.cu
index ccfbf8e77..f0a07d6dd 100644
--- a/modules/gpu/src/cuda/column_filter.11.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16uc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.12.cu b/modules/gpufilters/src/cuda/column_filter.16uc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.12.cu
rename to modules/gpufilters/src/cuda/column_filter.16uc4.cu
index a38f93b53..638ef794a 100644
--- a/modules/gpu/src/cuda/column_filter.12.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16uc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.5.cu b/modules/gpufilters/src/cuda/column_filter.32fc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.5.cu
rename to modules/gpufilters/src/cuda/column_filter.32fc1.cu
index a19266030..aa30933e6 100644
--- a/modules/gpu/src/cuda/column_filter.5.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32fc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.6.cu b/modules/gpufilters/src/cuda/column_filter.32fc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.6.cu
rename to modules/gpufilters/src/cuda/column_filter.32fc3.cu
index f4f7c4ffb..c0ed3ac3c 100644
--- a/modules/gpu/src/cuda/column_filter.6.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32fc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.7.cu b/modules/gpufilters/src/cuda/column_filter.32fc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.7.cu
rename to modules/gpufilters/src/cuda/column_filter.32fc4.cu
index 9f94bed9d..f37f71792 100644
--- a/modules/gpu/src/cuda/column_filter.7.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32fc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.4.cu b/modules/gpufilters/src/cuda/column_filter.32sc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.4.cu
rename to modules/gpufilters/src/cuda/column_filter.32sc1.cu
index 8c9db6985..ee052050d 100644
--- a/modules/gpu/src/cuda/column_filter.4.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32sc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.13.cu b/modules/gpufilters/src/cuda/column_filter.32sc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.13.cu
rename to modules/gpufilters/src/cuda/column_filter.32sc3.cu
index 40eec7a83..b921d9610 100644
--- a/modules/gpu/src/cuda/column_filter.13.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32sc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.14.cu b/modules/gpufilters/src/cuda/column_filter.32sc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.14.cu
rename to modules/gpufilters/src/cuda/column_filter.32sc4.cu
index 08151ac6d..dd21524c5 100644
--- a/modules/gpu/src/cuda/column_filter.14.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32sc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.0.cu b/modules/gpufilters/src/cuda/column_filter.8uc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.0.cu
rename to modules/gpufilters/src/cuda/column_filter.8uc1.cu
index 339fb8070..470f3ee8e 100644
--- a/modules/gpu/src/cuda/column_filter.0.cu
+++ b/modules/gpufilters/src/cuda/column_filter.8uc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.1.cu b/modules/gpufilters/src/cuda/column_filter.8uc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.1.cu
rename to modules/gpufilters/src/cuda/column_filter.8uc3.cu
index 53914a215..5d5be5831 100644
--- a/modules/gpu/src/cuda/column_filter.1.cu
+++ b/modules/gpufilters/src/cuda/column_filter.8uc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.2.cu b/modules/gpufilters/src/cuda/column_filter.8uc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/column_filter.2.cu
rename to modules/gpufilters/src/cuda/column_filter.8uc4.cu
index a615944cb..8a322f299 100644
--- a/modules/gpu/src/cuda/column_filter.2.cu
+++ b/modules/gpufilters/src/cuda/column_filter.8uc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "column_filter.h"
+#include "column_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/column_filter.h b/modules/gpufilters/src/cuda/column_filter.hpp
similarity index 100%
rename from modules/gpu/src/cuda/column_filter.h
rename to modules/gpufilters/src/cuda/column_filter.hpp
diff --git a/modules/gpufilters/src/cuda/filter2d.cu b/modules/gpufilters/src/cuda/filter2d.cu
new file mode 100644
index 000000000..0bb5fcd87
--- /dev/null
+++ b/modules/gpufilters/src/cuda/filter2d.cu
@@ -0,0 +1,158 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/border_interpolate.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        #define FILTER2D_MAX_KERNEL_SIZE 16
+
+        __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
+
+        template <class SrcT, typename D>
+        __global__ void filter2D(const SrcT src, PtrStepSz<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
+        {
+            typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
+
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= dst.cols || y >= dst.rows)
+                return;
+
+            sum_t res = VecTraits<sum_t>::all(0);
+            int kInd = 0;
+
+            for (int i = 0; i < kHeight; ++i)
+            {
+                for (int j = 0; j < kWidth; ++j)
+                    res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++];
+            }
+
+            dst(y, x) = saturate_cast<D>(res);
+        }
+
+        template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
+
+        #define IMPLEMENT_FILTER2D_TEX_READER(type) \
+            texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
+            struct tex_filter2D_ ## type ## _reader \
+            { \
+                typedef type elem_type; \
+                typedef int index_type; \
+                const int xoff; \
+                const int yoff; \
+                tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
+                __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
+                { \
+                    return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
+                } \
+            }; \
+            template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
+            { \
+                static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, \
+                    int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
+                { \
+                    typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
+                    dim3 block(16, 16); \
+                    dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
+                    bindTexture(&tex_filter2D_ ## type , srcWhole); \
+                    tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
+                    Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
+                    BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
+                    filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
+                    cudaSafeCall( cudaGetLastError() ); \
+                    if (stream == 0) \
+                        cudaSafeCall( cudaDeviceSynchronize() ); \
+                } \
+            };
+
+        IMPLEMENT_FILTER2D_TEX_READER(uchar);
+        IMPLEMENT_FILTER2D_TEX_READER(uchar4);
+
+        IMPLEMENT_FILTER2D_TEX_READER(ushort);
+        IMPLEMENT_FILTER2D_TEX_READER(ushort4);
+
+        IMPLEMENT_FILTER2D_TEX_READER(float);
+        IMPLEMENT_FILTER2D_TEX_READER(float4);
+
+        #undef IMPLEMENT_FILTER2D_TEX_READER
+
+        template <typename T, typename D>
+        void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
+                          int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
+                          int borderMode, const float* borderValue, cudaStream_t stream)
+        {
+            typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
+            static const func_t funcs[] =
+            {
+                Filter2DCaller<T, D, BrdReflect101>::call,
+                Filter2DCaller<T, D, BrdReplicate>::call,
+                Filter2DCaller<T, D, BrdConstant>::call,
+                Filter2DCaller<T, D, BrdReflect>::call,
+                Filter2DCaller<T, D, BrdWrap>::call
+            };
+
+            if (stream == 0)
+                cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
+            else
+                cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
+
+            funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
+        }
+
+        template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+    }
+}}}
+
+#endif // CUDA_DISABLER
diff --git a/modules/gpu/src/cuda/row_filter.8.cu b/modules/gpufilters/src/cuda/row_filter.16sc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.8.cu
rename to modules/gpufilters/src/cuda/row_filter.16sc1.cu
index b899e87a7..59ebb9f5f 100644
--- a/modules/gpu/src/cuda/row_filter.8.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16sc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.3.cu b/modules/gpufilters/src/cuda/row_filter.16sc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.3.cu
rename to modules/gpufilters/src/cuda/row_filter.16sc3.cu
index fe8466695..fcf40d81e 100644
--- a/modules/gpu/src/cuda/row_filter.3.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16sc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.9.cu b/modules/gpufilters/src/cuda/row_filter.16sc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.9.cu
rename to modules/gpufilters/src/cuda/row_filter.16sc4.cu
index 516dd8fe7..c5d472692 100644
--- a/modules/gpu/src/cuda/row_filter.9.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16sc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.10.cu b/modules/gpufilters/src/cuda/row_filter.16uc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.10.cu
rename to modules/gpufilters/src/cuda/row_filter.16uc1.cu
index 7d93ee31a..02e125abc 100644
--- a/modules/gpu/src/cuda/row_filter.10.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16uc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.11.cu b/modules/gpufilters/src/cuda/row_filter.16uc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.11.cu
rename to modules/gpufilters/src/cuda/row_filter.16uc3.cu
index 31bccc48b..494c604b4 100644
--- a/modules/gpu/src/cuda/row_filter.11.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16uc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.12.cu b/modules/gpufilters/src/cuda/row_filter.16uc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.12.cu
rename to modules/gpufilters/src/cuda/row_filter.16uc4.cu
index 7be543f6b..1eb1ac25a 100644
--- a/modules/gpu/src/cuda/row_filter.12.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16uc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.5.cu b/modules/gpufilters/src/cuda/row_filter.32fc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.5.cu
rename to modules/gpufilters/src/cuda/row_filter.32fc1.cu
index 975aea4a1..bf577c6b7 100644
--- a/modules/gpu/src/cuda/row_filter.5.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32fc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.6.cu b/modules/gpufilters/src/cuda/row_filter.32fc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.6.cu
rename to modules/gpufilters/src/cuda/row_filter.32fc3.cu
index d5894452b..594fc04b5 100644
--- a/modules/gpu/src/cuda/row_filter.6.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32fc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.7.cu b/modules/gpufilters/src/cuda/row_filter.32fc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.7.cu
rename to modules/gpufilters/src/cuda/row_filter.32fc4.cu
index ac3fcc14b..5f2812bb3 100644
--- a/modules/gpu/src/cuda/row_filter.7.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32fc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.4.cu b/modules/gpufilters/src/cuda/row_filter.32sc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.4.cu
rename to modules/gpufilters/src/cuda/row_filter.32sc1.cu
index 050f7af04..67f3fb04c 100644
--- a/modules/gpu/src/cuda/row_filter.4.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32sc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.13.cu b/modules/gpufilters/src/cuda/row_filter.32sc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.13.cu
rename to modules/gpufilters/src/cuda/row_filter.32sc3.cu
index bd700b1bb..8e881a22a 100644
--- a/modules/gpu/src/cuda/row_filter.13.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32sc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.14.cu b/modules/gpufilters/src/cuda/row_filter.32sc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.14.cu
rename to modules/gpufilters/src/cuda/row_filter.32sc4.cu
index 97df2f128..66f00cf06 100644
--- a/modules/gpu/src/cuda/row_filter.14.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32sc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.0.cu b/modules/gpufilters/src/cuda/row_filter.8uc1.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.0.cu
rename to modules/gpufilters/src/cuda/row_filter.8uc1.cu
index a4b423984..c94b39f1b 100644
--- a/modules/gpu/src/cuda/row_filter.0.cu
+++ b/modules/gpufilters/src/cuda/row_filter.8uc1.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.1.cu b/modules/gpufilters/src/cuda/row_filter.8uc3.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.1.cu
rename to modules/gpufilters/src/cuda/row_filter.8uc3.cu
index ac4724f8c..1c924c10b 100644
--- a/modules/gpu/src/cuda/row_filter.1.cu
+++ b/modules/gpufilters/src/cuda/row_filter.8uc3.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.2.cu b/modules/gpufilters/src/cuda/row_filter.8uc4.cu
similarity index 98%
rename from modules/gpu/src/cuda/row_filter.2.cu
rename to modules/gpufilters/src/cuda/row_filter.8uc4.cu
index d630b6fc5..1ae9651a8 100644
--- a/modules/gpu/src/cuda/row_filter.2.cu
+++ b/modules/gpufilters/src/cuda/row_filter.8uc4.cu
@@ -42,7 +42,7 @@
 
 #if !defined CUDA_DISABLER
 
-#include "row_filter.h"
+#include "row_filter.hpp"
 
 namespace filter
 {
diff --git a/modules/gpu/src/cuda/row_filter.h b/modules/gpufilters/src/cuda/row_filter.hpp
similarity index 100%
rename from modules/gpu/src/cuda/row_filter.h
rename to modules/gpufilters/src/cuda/row_filter.hpp
diff --git a/modules/gpu/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp
similarity index 99%
rename from modules/gpu/src/filtering.cpp
rename to modules/gpufilters/src/filtering.cpp
index 33d179b7e..6416325e1 100644
--- a/modules/gpu/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@@ -45,7 +45,6 @@
 using namespace cv;
 using namespace cv::gpu;
 
-
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
 Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
@@ -628,31 +627,44 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke
 {
     switch( op )
     {
-    case MORPH_ERODE:   erode(src, dst, kernel, buf1, anchor, iterations, stream); break;
-    case MORPH_DILATE: dilate(src, dst, kernel, buf1, anchor, iterations, stream); break;
+    case MORPH_ERODE:
+        erode(src, dst, kernel, buf1, anchor, iterations, stream);
+        break;
+
+    case MORPH_DILATE:
+        dilate(src, dst, kernel, buf1, anchor, iterations, stream);
+        break;
+
     case MORPH_OPEN:
         erode(src, buf2, kernel, buf1, anchor, iterations, stream);
         dilate(buf2, dst, kernel, buf1, anchor, iterations, stream);
         break;
+
     case MORPH_CLOSE:
         dilate(src, buf2, kernel, buf1, anchor, iterations, stream);
         erode(buf2, dst, kernel, buf1, anchor, iterations, stream);
         break;
+
+#ifdef HAVE_OPENCV_GPUARITHM
     case MORPH_GRADIENT:
         erode(src, buf2, kernel, buf1, anchor, iterations, stream);
         dilate(src, dst, kernel, buf1, anchor, iterations, stream);
-        subtract(dst, buf2, dst, GpuMat(), -1, stream);
+        gpu::subtract(dst, buf2, dst, GpuMat(), -1, stream);
         break;
+
     case MORPH_TOPHAT:
         erode(src, dst, kernel, buf1, anchor, iterations, stream);
         dilate(dst, buf2, kernel, buf1, anchor, iterations, stream);
-        subtract(src, buf2, dst, GpuMat(), -1, stream);
+        gpu::subtract(src, buf2, dst, GpuMat(), -1, stream);
         break;
+
     case MORPH_BLACKHAT:
         dilate(src, dst, kernel, buf1, anchor, iterations, stream);
         erode(dst, buf2, kernel, buf1, anchor, iterations, stream);
-        subtract(buf2, src, dst, GpuMat(), -1, stream);
+        gpu::subtract(buf2, src, dst, GpuMat(), -1, stream);
         break;
+#endif
+
     default:
         CV_Error(cv::Error::StsBadArg, "unknown morphological operation");
     }
diff --git a/modules/gpufilters/src/precomp.cpp b/modules/gpufilters/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpufilters/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpufilters/src/precomp.hpp b/modules/gpufilters/src/precomp.hpp
new file mode 100644
index 000000000..e8e46ffda
--- /dev/null
+++ b/modules/gpufilters/src/precomp.hpp
@@ -0,0 +1,59 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp
similarity index 96%
rename from modules/gpu/test/test_filters.cpp
rename to modules/gpufilters/test/test_filters.cpp
index 1c0679e48..5adcd87a4 100644
--- a/modules/gpu/test/test_filters.cpp
+++ b/modules/gpufilters/test/test_filters.cpp
@@ -105,7 +105,7 @@ GPU_TEST_P(Blur, Accuracy)
     EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, Blur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -164,7 +164,7 @@ GPU_TEST_P(Sobel, Accuracy)
     EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Sobel, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
@@ -227,7 +227,7 @@ GPU_TEST_P(Scharr, Accuracy)
     EXPECT_MAT_NEAR(getInnerROI(dst_gold, cv::Size(3, 3)), getInnerROI(dst, cv::Size(3, 3)), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Scharr, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
@@ -301,7 +301,7 @@ GPU_TEST_P(GaussianBlur, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
@@ -363,7 +363,7 @@ GPU_TEST_P(Laplacian, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, Laplacian, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
@@ -411,7 +411,7 @@ GPU_TEST_P(Erode, Accuracy)
     EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, Erode, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Erode, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -460,7 +460,7 @@ GPU_TEST_P(Dilate, Accuracy)
     EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, Dilate, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Dilate, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -513,7 +513,7 @@ GPU_TEST_P(MorphEx, Accuracy)
     EXPECT_MAT_NEAR(getInnerROI(dst_gold, border), getInnerROI(dst, border), 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, MorphEx, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, MorphEx, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -565,7 +565,7 @@ GPU_TEST_P(Filter2D, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
diff --git a/modules/gpufilters/test/test_main.cpp b/modules/gpufilters/test/test_main.cpp
new file mode 100644
index 000000000..c37a85cb4
--- /dev/null
+++ b/modules/gpufilters/test/test_main.cpp
@@ -0,0 +1,120 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+using namespace testing;
+
+int main(int argc, char** argv)
+{
+    try
+    {
+        const std::string keys =
+                "{ h help ?            |      | Print help}"
+                "{ i info              |      | Print information about system and exit }"
+                "{ device              | -1   | Device on which tests will be executed (-1 means all devices) }"
+                ;
+
+        CommandLineParser cmd(argc, (const char**)argv, keys);
+
+        if (cmd.has("help"))
+        {
+            cmd.printMessage();
+            return 0;
+        }
+
+        printCudaInfo();
+
+        if (cmd.has("info"))
+        {
+            return 0;
+        }
+
+        int device = cmd.get<int>("device");
+        if (device < 0)
+        {
+            DeviceManager::instance().loadAll();
+
+            cout << "Run tests on all supported devices \n" << endl;
+        }
+        else
+        {
+            DeviceManager::instance().load(device);
+
+            DeviceInfo info(device);
+            cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
+        }
+
+        TS::ptr()->init("gpu");
+        InitGoogleTest(&argc, argv);
+
+        return RUN_ALL_TESTS();
+    }
+    catch (const exception& e)
+    {
+        cerr << e.what() << endl;
+        return -1;
+    }
+    catch (...)
+    {
+        cerr << "Unknown error" << endl;
+        return -1;
+    }
+
+    return 0;
+}
+
+#else // HAVE_CUDA
+
+int main()
+{
+    printf("OpenCV was built without CUDA support\n");
+    return 0;
+}
+
+#endif // HAVE_CUDA
diff --git a/modules/gpufilters/test/test_precomp.cpp b/modules/gpufilters/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpufilters/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpufilters/test/test_precomp.hpp b/modules/gpufilters/test/test_precomp.hpp
new file mode 100644
index 000000000..95984929f
--- /dev/null
+++ b/modules/gpufilters/test/test_precomp.hpp
@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/imgproc.hpp"
+
+#endif
diff --git a/modules/stitching/CMakeLists.txt b/modules/stitching/CMakeLists.txt
index 2282d8123..647d8b151 100644
--- a/modules/stitching/CMakeLists.txt
+++ b/modules/stitching/CMakeLists.txt
@@ -1,3 +1,3 @@
 set(the_description "Images stitching")
-ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_gpuarithm opencv_nonfree)
+ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_nonfree)
 
diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt
index fae24e17e..378a2a942 100644
--- a/modules/superres/CMakeLists.txt
+++ b/modules/superres/CMakeLists.txt
@@ -4,4 +4,4 @@ endif()
 
 set(the_description "Super Resolution")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
-ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_highgui opencv_gpu opencv_gpucodec opencv_gpuarithm)
+ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_highgui opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_gpucodec)
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index 92f7dc790..4678532af 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -18,6 +18,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
 
   if(HAVE_opencv_gpu)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index efff5f939..760bc26e6 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 

From bc0e563092afd062dee9d33c53a5045fcafff260 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 8 Apr 2013 15:11:48 +0400
Subject: [PATCH 04/49] CV_GPU_TEST_MAIN macros

---
 modules/gpuarithm/test/test_main.cpp       | 77 +---------------------
 modules/gpucodec/test/test_main.cpp        |  2 +-
 modules/gpufilters/test/test_main.cpp      | 77 +---------------------
 modules/ts/include/opencv2/ts/gpu_test.hpp | 64 ++++++++++++++++++
 4 files changed, 67 insertions(+), 153 deletions(-)

diff --git a/modules/gpuarithm/test/test_main.cpp b/modules/gpuarithm/test/test_main.cpp
index c37a85cb4..eea3d7c00 100644
--- a/modules/gpuarithm/test/test_main.cpp
+++ b/modules/gpuarithm/test/test_main.cpp
@@ -42,79 +42,4 @@
 
 #include "test_precomp.hpp"
 
-#ifdef HAVE_CUDA
-
-using namespace std;
-using namespace cv;
-using namespace cv::gpu;
-using namespace cvtest;
-using namespace testing;
-
-int main(int argc, char** argv)
-{
-    try
-    {
-        const std::string keys =
-                "{ h help ?            |      | Print help}"
-                "{ i info              |      | Print information about system and exit }"
-                "{ device              | -1   | Device on which tests will be executed (-1 means all devices) }"
-                ;
-
-        CommandLineParser cmd(argc, (const char**)argv, keys);
-
-        if (cmd.has("help"))
-        {
-            cmd.printMessage();
-            return 0;
-        }
-
-        printCudaInfo();
-
-        if (cmd.has("info"))
-        {
-            return 0;
-        }
-
-        int device = cmd.get<int>("device");
-        if (device < 0)
-        {
-            DeviceManager::instance().loadAll();
-
-            cout << "Run tests on all supported devices \n" << endl;
-        }
-        else
-        {
-            DeviceManager::instance().load(device);
-
-            DeviceInfo info(device);
-            cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
-        }
-
-        TS::ptr()->init("gpu");
-        InitGoogleTest(&argc, argv);
-
-        return RUN_ALL_TESTS();
-    }
-    catch (const exception& e)
-    {
-        cerr << e.what() << endl;
-        return -1;
-    }
-    catch (...)
-    {
-        cerr << "Unknown error" << endl;
-        return -1;
-    }
-
-    return 0;
-}
-
-#else // HAVE_CUDA
-
-int main()
-{
-    printf("OpenCV was built without CUDA support\n");
-    return 0;
-}
-
-#endif // HAVE_CUDA
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpucodec/test/test_main.cpp b/modules/gpucodec/test/test_main.cpp
index 958adfee5..eea3d7c00 100644
--- a/modules/gpucodec/test/test_main.cpp
+++ b/modules/gpucodec/test/test_main.cpp
@@ -42,4 +42,4 @@
 
 #include "test_precomp.hpp"
 
-CV_TEST_MAIN("gpu")
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpufilters/test/test_main.cpp b/modules/gpufilters/test/test_main.cpp
index c37a85cb4..eea3d7c00 100644
--- a/modules/gpufilters/test/test_main.cpp
+++ b/modules/gpufilters/test/test_main.cpp
@@ -42,79 +42,4 @@
 
 #include "test_precomp.hpp"
 
-#ifdef HAVE_CUDA
-
-using namespace std;
-using namespace cv;
-using namespace cv::gpu;
-using namespace cvtest;
-using namespace testing;
-
-int main(int argc, char** argv)
-{
-    try
-    {
-        const std::string keys =
-                "{ h help ?            |      | Print help}"
-                "{ i info              |      | Print information about system and exit }"
-                "{ device              | -1   | Device on which tests will be executed (-1 means all devices) }"
-                ;
-
-        CommandLineParser cmd(argc, (const char**)argv, keys);
-
-        if (cmd.has("help"))
-        {
-            cmd.printMessage();
-            return 0;
-        }
-
-        printCudaInfo();
-
-        if (cmd.has("info"))
-        {
-            return 0;
-        }
-
-        int device = cmd.get<int>("device");
-        if (device < 0)
-        {
-            DeviceManager::instance().loadAll();
-
-            cout << "Run tests on all supported devices \n" << endl;
-        }
-        else
-        {
-            DeviceManager::instance().load(device);
-
-            DeviceInfo info(device);
-            cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
-        }
-
-        TS::ptr()->init("gpu");
-        InitGoogleTest(&argc, argv);
-
-        return RUN_ALL_TESTS();
-    }
-    catch (const exception& e)
-    {
-        cerr << e.what() << endl;
-        return -1;
-    }
-    catch (...)
-    {
-        cerr << "Unknown error" << endl;
-        return -1;
-    }
-
-    return 0;
-}
-
-#else // HAVE_CUDA
-
-int main()
-{
-    printf("OpenCV was built without CUDA support\n");
-    return 0;
-}
-
-#endif // HAVE_CUDA
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/ts/include/opencv2/ts/gpu_test.hpp b/modules/ts/include/opencv2/ts/gpu_test.hpp
index 1a2caf70f..b4c6e7b74 100644
--- a/modules/ts/include/opencv2/ts/gpu_test.hpp
+++ b/modules/ts/include/opencv2/ts/gpu_test.hpp
@@ -43,6 +43,7 @@
 #ifndef __OPENCV_GPU_TEST_UTILITY_HPP__
 #define __OPENCV_GPU_TEST_UTILITY_HPP__
 
+#include <stdexcept>
 #include "opencv2/core.hpp"
 #include "opencv2/core/gpumat.hpp"
 #include "opencv2/highgui.hpp"
@@ -345,4 +346,67 @@ namespace cv { namespace gpu
     CV_EXPORTS void PrintTo(const DeviceInfo& info, std::ostream* os);
 }}
 
+#ifdef HAVE_CUDA
+
+#define CV_GPU_TEST_MAIN(resourcesubdir) \
+    int main(int argc, char* argv[]) \
+    { \
+        try \
+        { \
+            cv::CommandLineParser cmd(argc, argv, \
+                "{ h help ?            |      | Print help}" \
+                "{ i info              |      | Print information about system and exit }" \
+                "{ device              | -1   | Device on which tests will be executed (-1 means all devices) }" \
+            ); \
+            if (cmd.has("help")) \
+            { \
+                cmd.printMessage(); \
+                return 0; \
+            } \
+            cvtest::printCudaInfo(); \
+            if (cmd.has("info")) \
+            { \
+                return 0; \
+            } \
+            int device = cmd.get<int>("device"); \
+            if (device < 0) \
+            { \
+                cvtest::DeviceManager::instance().loadAll(); \
+                std::cout << "Run tests on all supported devices \n" << std::endl; \
+            } \
+            else \
+            { \
+                cvtest::DeviceManager::instance().load(device); \
+                cv::gpu::DeviceInfo info(device); \
+                std::cout << "Run tests on device " << device << " [" << info.name() << "] \n" << std::endl; \
+            } \
+            cvtest::TS::ptr()->init( resourcesubdir ); \
+            testing::InitGoogleTest(&argc, argv); \
+            return RUN_ALL_TESTS(); \
+        } \
+        catch (const std::exception& e) \
+        { \
+            std::cerr << e.what() << std::endl; \
+            return -1; \
+        } \
+        catch (...) \
+        { \
+            std::cerr << "Unknown error" << std::endl; \
+            return -1; \
+        } \
+        return 0; \
+    }
+
+#else // HAVE_CUDA
+
+#define CV_GPU_TEST_MAIN(resourcesubdir) \
+    int main() \
+    { \
+        printf("OpenCV was built without CUDA support\n"); \
+        return 0; \
+    }
+
+#endif // HAVE_CUDA
+
+
 #endif // __OPENCV_GPU_TEST_UTILITY_HPP__

From 229ca0914a914a98a657ab29daae76e9caa91222 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 8 Apr 2013 18:51:06 +0400
Subject: [PATCH 05/49] gpunvidia module for NCV & NPP API

---
 modules/gpu/CMakeLists.txt                    |  26 +-
 modules/gpu/src/cascadeclassifier.cpp         | 237 ------------------
 modules/gpu/src/cuda/internal_shared.hpp      |   8 +-
 modules/gpu/src/cuda/safe_call.hpp            |   2 +-
 modules/gpu/src/precomp.hpp                   |   5 +-
 modules/gpu/test/test_main.cpp                |  45 ++++
 modules/gpu/test/test_precomp.hpp             |   1 -
 modules/gpunvidia/CMakeLists.txt              |   9 +
 .../gpunvidia/include/opencv2/gpunvidia.hpp   |  52 ++++
 .../include/opencv2/gpunvidia}/NCV.hpp        |   0
 .../opencv2/gpunvidia}/NCVBroxOpticalFlow.hpp |   2 +-
 .../gpunvidia}/NCVHaarObjectDetection.hpp     |   2 +-
 .../include/opencv2/gpunvidia}/NCVPyramid.hpp |   2 +-
 .../opencv2/gpunvidia}/NPP_staging.hpp        |   2 +-
 .../core/NCV.cu => gpunvidia/src/NCV.cpp}     | 211 ++++------------
 modules/gpunvidia/src/cuda/NCV.cu             | 180 +++++++++++++
 .../core => gpunvidia/src/cuda}/NCVAlg.hpp    |   2 +-
 .../src/cuda}/NCVBroxOpticalFlow.cu           |   9 +-
 .../src/cuda}/NCVColorConversion.hpp          |   0
 .../src/cuda}/NCVHaarObjectDetection.cu       | 212 +++++++++++++++-
 .../src/cuda}/NCVPixelOperations.hpp          |   2 +-
 .../core => gpunvidia/src/cuda}/NCVPyramid.cu |  17 +-
 .../src/cuda}/NCVRuntimeTemplates.hpp         |   0
 .../src/cuda}/NPP_staging.cu                  |   4 +-
 modules/gpunvidia/src/precomp.cpp             |  43 ++++
 modules/gpunvidia/src/precomp.hpp             |  56 +++++
 .../test}/NCVAutoTestLister.hpp               |   8 +-
 .../nvidia => gpunvidia/test}/NCVTest.hpp     |   3 +-
 .../test}/NCVTestSourceProvider.hpp           |   4 +-
 .../nvidia => gpunvidia/test}/TestCompact.cpp |   7 +-
 .../nvidia => gpunvidia/test}/TestCompact.h   |   0
 .../test}/TestDrawRects.cpp                   |   7 +-
 .../nvidia => gpunvidia/test}/TestDrawRects.h |   0
 .../test}/TestHaarCascadeApplication.cpp      |  14 +-
 .../test}/TestHaarCascadeApplication.h        |   0
 .../test}/TestHaarCascadeLoader.cpp           |   7 +-
 .../test}/TestHaarCascadeLoader.h             |   0
 .../test}/TestHypothesesFilter.cpp            |   7 +-
 .../test}/TestHypothesesFilter.h              |   0
 .../test}/TestHypothesesGrow.cpp              |   7 +-
 .../test}/TestHypothesesGrow.h                |   0
 .../test}/TestIntegralImage.cpp               |   7 +-
 .../test}/TestIntegralImage.h                 |   0
 .../test}/TestIntegralImageSquared.cpp        |   6 +-
 .../test}/TestIntegralImageSquared.h          |   0
 .../test}/TestRectStdDev.cpp                  |   8 +-
 .../test}/TestRectStdDev.h                    |   0
 .../nvidia => gpunvidia/test}/TestResize.cpp  |   8 +-
 .../nvidia => gpunvidia/test}/TestResize.h    |   0
 .../test}/TestTranspose.cpp                   |   8 +-
 .../nvidia => gpunvidia/test}/TestTranspose.h |   0
 .../nvidia => gpunvidia/test}/main_nvidia.cpp |  29 +--
 .../test/main_test_nvidia.h                   |   0
 .../main.cpp => gpunvidia/test/test_main.cpp} |   0
 .../{gpu => gpunvidia}/test/test_nvidia.cpp   |   0
 modules/gpunvidia/test/test_precomp.cpp       |  43 ++++
 modules/gpunvidia/test/test_precomp.hpp       |  95 +++++++
 samples/gpu/CMakeLists.txt                    |   2 +-
 samples/gpu/cascadeclassifier_nvidia_api.cpp  |   2 +-
 samples/gpu/opticalflow_nvidia_api.cpp        |   3 +-
 60 files changed, 831 insertions(+), 573 deletions(-)
 create mode 100644 modules/gpu/test/test_main.cpp
 create mode 100644 modules/gpunvidia/CMakeLists.txt
 create mode 100644 modules/gpunvidia/include/opencv2/gpunvidia.hpp
 rename modules/{gpu/src/nvidia/core => gpunvidia/include/opencv2/gpunvidia}/NCV.hpp (100%)
 rename modules/{gpu/src/nvidia => gpunvidia/include/opencv2/gpunvidia}/NCVBroxOpticalFlow.hpp (99%)
 rename modules/{gpu/src/nvidia => gpunvidia/include/opencv2/gpunvidia}/NCVHaarObjectDetection.hpp (99%)
 rename modules/{gpu/src/nvidia/core => gpunvidia/include/opencv2/gpunvidia}/NCVPyramid.hpp (98%)
 rename modules/{gpu/src/nvidia/NPP_staging => gpunvidia/include/opencv2/gpunvidia}/NPP_staging.hpp (99%)
 rename modules/{gpu/src/nvidia/core/NCV.cu => gpunvidia/src/NCV.cpp} (82%)
 create mode 100644 modules/gpunvidia/src/cuda/NCV.cu
 rename modules/{gpu/src/nvidia/core => gpunvidia/src/cuda}/NCVAlg.hpp (99%)
 rename modules/{gpu/src/nvidia => gpunvidia/src/cuda}/NCVBroxOpticalFlow.cu (99%)
 rename modules/{gpu/src/nvidia/core => gpunvidia/src/cuda}/NCVColorConversion.hpp (100%)
 rename modules/{gpu/src/nvidia => gpunvidia/src/cuda}/NCVHaarObjectDetection.cu (92%)
 rename modules/{gpu/src/nvidia/core => gpunvidia/src/cuda}/NCVPixelOperations.hpp (99%)
 rename modules/{gpu/src/nvidia/core => gpunvidia/src/cuda}/NCVPyramid.cu (99%)
 rename modules/{gpu/src/nvidia/core => gpunvidia/src/cuda}/NCVRuntimeTemplates.hpp (100%)
 rename modules/{gpu/src/nvidia/NPP_staging => gpunvidia/src/cuda}/NPP_staging.cu (99%)
 create mode 100644 modules/gpunvidia/src/precomp.cpp
 create mode 100644 modules/gpunvidia/src/precomp.hpp
 rename modules/{gpu/test/nvidia => gpunvidia/test}/NCVAutoTestLister.hpp (97%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/NCVTest.hpp (99%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/NCVTestSourceProvider.hpp (99%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestCompact.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestCompact.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestDrawRects.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestDrawRects.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHaarCascadeApplication.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHaarCascadeApplication.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHaarCascadeLoader.cpp (97%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHaarCascadeLoader.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHypothesesFilter.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHypothesesFilter.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHypothesesGrow.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestHypothesesGrow.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestIntegralImage.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestIntegralImage.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestIntegralImageSquared.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestIntegralImageSquared.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestRectStdDev.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestRectStdDev.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestResize.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestResize.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestTranspose.cpp (98%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/TestTranspose.h (100%)
 rename modules/{gpu/test/nvidia => gpunvidia/test}/main_nvidia.cpp (96%)
 rename modules/{gpu => gpunvidia}/test/main_test_nvidia.h (100%)
 rename modules/{gpu/test/main.cpp => gpunvidia/test/test_main.cpp} (100%)
 rename modules/{gpu => gpunvidia}/test/test_nvidia.cpp (100%)
 create mode 100644 modules/gpunvidia/test/test_precomp.cpp
 create mode 100644 modules/gpunvidia/test/test_precomp.hpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 1ed980694..ecad9e4dd 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -3,7 +3,8 @@ if(ANDROID OR IOS)
 endif()
 
 set(the_description "GPU-accelerated Computer Vision")
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters)
+
+ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters OPTIONAL opencv_gpunvidia)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
 
@@ -18,12 +19,7 @@ source_group("Src\\Host"      FILES ${lib_srcs} ${lib_int_hdrs})
 source_group("Src\\Cuda"      FILES ${lib_cuda} ${lib_cuda_hdrs})
 
 if(HAVE_CUDA)
-  file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp" "src/nvidia/*.h*")
-  file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
-  set(ncv_files ${ncv_srcs} ${ncv_cuda})
-
-  source_group("Src\\NVidia" FILES ${ncv_files})
-  ocv_include_directories("src/nvidia" "src/nvidia/core" "src/nvidia/NPP_staging" ${CUDA_INCLUDE_DIRS})
+  ocv_include_directories(${CUDA_INCLUDE_DIRS})
   ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter /wd4211 /wd4201 /wd4100 /wd4505 /wd4408)
 
   if(MSVC)
@@ -43,12 +39,11 @@ else()
   set(lib_cuda "")
   set(cuda_objs "")
   set(cuda_link_libs "")
-  set(ncv_files "")
 endif()
 
 ocv_set_module_sources(
   HEADERS ${lib_hdrs}
-  SOURCES ${lib_int_hdrs} ${lib_cuda_hdrs} ${lib_srcs} ${lib_cuda} ${ncv_files} ${cuda_objs}
+  SOURCES ${lib_int_hdrs} ${lib_cuda_hdrs} ${lib_srcs} ${lib_cuda} ${cuda_objs}
   )
 
 ocv_create_module(${cuda_link_libs})
@@ -57,10 +52,6 @@ if(HAVE_CUDA)
   if(HAVE_CUFFT)
     CUDA_ADD_CUFFT_TO_TARGET(${the_module})
   endif()
-
-  install(FILES src/nvidia/NPP_staging/NPP_staging.hpp  src/nvidia/core/NCV.hpp
-    DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2/${name}
-    COMPONENT main)
 endif()
 
 ocv_add_precompiled_headers(${the_module})
@@ -71,15 +62,8 @@ ocv_add_precompiled_headers(${the_module})
 file(GLOB test_srcs "test/*.cpp")
 file(GLOB test_hdrs "test/*.hpp" "test/*.h")
 
-set(nvidia "")
-if(HAVE_CUDA)
-  file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.hpp" "test/nvidia/*.h")
-  set(nvidia FILES "Src\\\\\\\\NVidia" ${nvidia}) # 8 ugly backslashes :'(
-endif()
-
 ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
-                       FILES "Src" ${test_srcs}
-                       ${nvidia})
+                       FILES "Src" ${test_srcs})
 ocv_add_perf_tests()
 
 if(HAVE_CUDA)
diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp
index c7514bb21..0b9f9aafc 100644
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -722,240 +722,3 @@ bool cv::gpu::CascadeClassifier_GPU::load(const String& filename)
 }
 
 #endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#if defined (HAVE_CUDA)
-
-struct RectConvert
-{
-    Rect operator()(const NcvRect32u& nr) const { return Rect(nr.x, nr.y, nr.width, nr.height); }
-    NcvRect32u operator()(const Rect& nr) const
-    {
-        NcvRect32u rect;
-        rect.x = nr.x;
-        rect.y = nr.y;
-        rect.width = nr.width;
-        rect.height = nr.height;
-        return rect;
-    }
-};
-
-void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
-{
-    std::vector<Rect> rects(hypotheses.size());
-    std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
-
-    if (weights)
-    {
-        std::vector<int> weights_int;
-        weights_int.assign(weights->begin(), weights->end());
-        cv::groupRectangles(rects, weights_int, groupThreshold, eps);
-    }
-    else
-    {
-        cv::groupRectangles(rects, groupThreshold, eps);
-    }
-    std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
-    hypotheses.resize(rects.size());
-}
-
-NCVStatus loadFromXML(const String &filename,
-                      HaarClassifierCascadeDescriptor &haar,
-                      std::vector<HaarStage64> &haarStages,
-                      std::vector<HaarClassifierNode128> &haarClassifierNodes,
-                      std::vector<HaarFeature64> &haarFeatures)
-{
-    NCVStatus ncvStat;
-
-    haar.NumStages = 0;
-    haar.NumClassifierRootNodes = 0;
-    haar.NumClassifierTotalNodes = 0;
-    haar.NumFeatures = 0;
-    haar.ClassifierSize.width = 0;
-    haar.ClassifierSize.height = 0;
-    haar.bHasStumpsOnly = true;
-    haar.bNeedsTiltedII = false;
-    Ncv32u curMaxTreeDepth;
-
-    std::vector<char> xmlFileCont;
-
-    std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
-    haarStages.resize(0);
-    haarClassifierNodes.resize(0);
-    haarFeatures.resize(0);
-
-    Ptr<CvHaarClassifierCascade> oldCascade = (CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0);
-    if (oldCascade.empty())
-    {
-        return NCV_HAAR_XML_LOADING_EXCEPTION;
-    }
-
-    haar.ClassifierSize.width = oldCascade->orig_window_size.width;
-    haar.ClassifierSize.height = oldCascade->orig_window_size.height;
-
-    int stagesCound = oldCascade->count;
-    for(int s = 0; s < stagesCound; ++s) // by stages
-    {
-        HaarStage64 curStage;
-        curStage.setStartClassifierRootNodeOffset(static_cast<Ncv32u>(haarClassifierNodes.size()));
-
-        curStage.setStageThreshold(oldCascade->stage_classifier[s].threshold);
-
-        int treesCount = oldCascade->stage_classifier[s].count;
-        for(int t = 0; t < treesCount; ++t) // by trees
-        {
-            Ncv32u nodeId = 0;
-            CvHaarClassifier* tree = &oldCascade->stage_classifier[s].classifier[t];
-
-            int nodesCount = tree->count;
-            for(int n = 0; n < nodesCount; ++n)  //by features
-            {
-                CvHaarFeature* feature = &tree->haar_feature[n];
-
-                HaarClassifierNode128 curNode;
-                curNode.setThreshold(tree->threshold[n]);
-
-                NcvBool bIsLeftNodeLeaf = false;
-                NcvBool bIsRightNodeLeaf = false;
-
-                HaarClassifierNodeDescriptor32 nodeLeft;
-                if ( tree->left[n] <= 0 )
-                {
-                    Ncv32f leftVal = tree->alpha[-tree->left[n]];
-                    ncvStat = nodeLeft.create(leftVal);
-                    ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
-                    bIsLeftNodeLeaf = true;
-                }
-                else
-                {
-                    Ncv32u leftNodeOffset = tree->left[n];
-                    nodeLeft.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1));
-                    haar.bHasStumpsOnly = false;
-                }
-                curNode.setLeftNodeDesc(nodeLeft);
-
-                HaarClassifierNodeDescriptor32 nodeRight;
-                if ( tree->right[n] <= 0 )
-                {
-                    Ncv32f rightVal = tree->alpha[-tree->right[n]];
-                    ncvStat = nodeRight.create(rightVal);
-                    ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
-                    bIsRightNodeLeaf = true;
-                }
-                else
-                {
-                    Ncv32u rightNodeOffset = tree->right[n];
-                    nodeRight.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + rightNodeOffset - 1));
-                    haar.bHasStumpsOnly = false;
-                }
-                curNode.setRightNodeDesc(nodeRight);
-
-                Ncv32u tiltedVal = feature->tilted;
-                haar.bNeedsTiltedII = (tiltedVal != 0);
-
-                Ncv32u featureId = 0;
-                for(int l = 0; l < CV_HAAR_FEATURE_MAX; ++l) //by rects
-                {
-                    Ncv32u rectX = feature->rect[l].r.x;
-                    Ncv32u rectY = feature->rect[l].r.y;
-                    Ncv32u rectWidth = feature->rect[l].r.width;
-                    Ncv32u rectHeight = feature->rect[l].r.height;
-
-                    Ncv32f rectWeight = feature->rect[l].weight;
-
-                    if (rectWeight == 0/* && rectX == 0 &&rectY == 0 && rectWidth == 0 && rectHeight == 0*/)
-                        break;
-
-                    HaarFeature64 curFeature;
-                    ncvStat = curFeature.setRect(rectX, rectY, rectWidth, rectHeight, haar.ClassifierSize.width, haar.ClassifierSize.height);
-                    curFeature.setWeight(rectWeight);
-                    ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
-                    haarFeatures.push_back(curFeature);
-
-                    featureId++;
-                }
-
-                HaarFeatureDescriptor32 tmpFeatureDesc;
-                ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, bIsLeftNodeLeaf, bIsRightNodeLeaf,
-                    featureId, static_cast<Ncv32u>(haarFeatures.size()) - featureId);
-                ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
-                curNode.setFeatureDesc(tmpFeatureDesc);
-
-                if (!nodeId)
-                {
-                    //root node
-                    haarClassifierNodes.push_back(curNode);
-                    curMaxTreeDepth = 1;
-                }
-                else
-                {
-                    //other node
-                    h_TmpClassifierNotRootNodes.push_back(curNode);
-                    curMaxTreeDepth++;
-                }
-
-                nodeId++;
-            }
-        }
-
-        curStage.setNumClassifierRootNodes(treesCount);
-        haarStages.push_back(curStage);
-    }
-
-    //fill in cascade stats
-    haar.NumStages = static_cast<Ncv32u>(haarStages.size());
-    haar.NumClassifierRootNodes = static_cast<Ncv32u>(haarClassifierNodes.size());
-    haar.NumClassifierTotalNodes = static_cast<Ncv32u>(haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size());
-    haar.NumFeatures = static_cast<Ncv32u>(haarFeatures.size());
-
-    //merge root and leaf nodes in one classifiers array
-    Ncv32u offsetRoot = static_cast<Ncv32u>(haarClassifierNodes.size());
-    for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
-    {
-        HaarFeatureDescriptor32 featureDesc = haarClassifierNodes[i].getFeatureDesc();
-
-        HaarClassifierNodeDescriptor32 nodeLeft = haarClassifierNodes[i].getLeftNodeDesc();
-        if (!featureDesc.isLeftNodeLeaf())
-        {
-            Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
-            nodeLeft.create(newOffset);
-        }
-        haarClassifierNodes[i].setLeftNodeDesc(nodeLeft);
-
-        HaarClassifierNodeDescriptor32 nodeRight = haarClassifierNodes[i].getRightNodeDesc();
-        if (!featureDesc.isRightNodeLeaf())
-        {
-            Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
-            nodeRight.create(newOffset);
-        }
-        haarClassifierNodes[i].setRightNodeDesc(nodeRight);
-    }
-
-    for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
-    {
-        HaarFeatureDescriptor32 featureDesc = h_TmpClassifierNotRootNodes[i].getFeatureDesc();
-
-        HaarClassifierNodeDescriptor32 nodeLeft = h_TmpClassifierNotRootNodes[i].getLeftNodeDesc();
-        if (!featureDesc.isLeftNodeLeaf())
-        {
-            Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
-            nodeLeft.create(newOffset);
-        }
-        h_TmpClassifierNotRootNodes[i].setLeftNodeDesc(nodeLeft);
-
-        HaarClassifierNodeDescriptor32 nodeRight = h_TmpClassifierNotRootNodes[i].getRightNodeDesc();
-        if (!featureDesc.isRightNodeLeaf())
-        {
-            Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
-            nodeRight.create(newOffset);
-        }
-        h_TmpClassifierNotRootNodes[i].setRightNodeDesc(nodeRight);
-
-        haarClassifierNodes.push_back(h_TmpClassifierNotRootNodes[i]);
-    }
-
-    return NCV_SUCCESS;
-}
-
-#endif /* HAVE_CUDA */
diff --git a/modules/gpu/src/cuda/internal_shared.hpp b/modules/gpu/src/cuda/internal_shared.hpp
index 4362451f8..c8d4e5b7f 100644
--- a/modules/gpu/src/cuda/internal_shared.hpp
+++ b/modules/gpu/src/cuda/internal_shared.hpp
@@ -45,10 +45,12 @@
 
 #include <cuda_runtime.h>
 #include <npp.h>
-#include "NPP_staging.hpp"
-#include "opencv2/gpu/devmem2d.hpp"
-#include "safe_call.hpp"
+
+#include "opencv2/core/cuda_devptrs.hpp"
 #include "opencv2/core/cuda/common.hpp"
+#include "opencv2/gpunvidia.hpp"
+
+#include "safe_call.hpp"
 
 namespace cv { namespace gpu
 {
diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp
index 10b72e623..fa62fb15a 100644
--- a/modules/gpu/src/cuda/safe_call.hpp
+++ b/modules/gpu/src/cuda/safe_call.hpp
@@ -45,7 +45,7 @@
 
 #include <cuda_runtime_api.h>
 #include <cufft.h>
-#include "NCV.hpp"
+#include "opencv2/gpunvidia.hpp"
 
 #if defined(__GNUC__)
     #define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index 69ddeaed9..0127bd28e 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -79,10 +79,7 @@
     #include "internal_shared.hpp"
     #include "opencv2/core/stream_accessor.hpp"
 
-    #include "nvidia/core/NCV.hpp"
-    #include "nvidia/NPP_staging/NPP_staging.hpp"
-    #include "nvidia/NCVHaarObjectDetection.hpp"
-    #include "nvidia/NCVBroxOpticalFlow.hpp"
+    #include "opencv2/gpunvidia.hpp"
 #endif /* defined(HAVE_CUDA) */
 
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/test/test_main.cpp b/modules/gpu/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpu/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index f6d1392c7..08807d51e 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -75,7 +75,6 @@
 #include "opencv2/gpu.hpp"
 
 #include "interpolation.hpp"
-#include "main_test_nvidia.h"
 
 #include "opencv2/core/gpu_private.hpp"
 
diff --git a/modules/gpunvidia/CMakeLists.txt b/modules/gpunvidia/CMakeLists.txt
new file mode 100644
index 000000000..7c1542430
--- /dev/null
+++ b/modules/gpunvidia/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(NOT HAVE_CUDA)
+  ocv_module_disable(gpunvidia)
+endif()
+
+set(the_description "GPU-accelerated Computer Vision (HAL module)")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpunvidia opencv_core opencv_objdetect)
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia.hpp b/modules/gpunvidia/include/opencv2/gpunvidia.hpp
new file mode 100644
index 000000000..c59dc6402
--- /dev/null
+++ b/modules/gpunvidia/include/opencv2/gpunvidia.hpp
@@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUNVIDIA_HPP__
+#define __OPENCV_GPUNVIDIA_HPP__
+
+#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpunvidia/NPP_staging.hpp"
+#include "opencv2/gpunvidia/NCVPyramid.hpp"
+#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
+#include "opencv2/gpunvidia/NCVHaarObjectDetection.hpp"
+
+#endif /* __OPENCV_GPUNVIDIA_HPP__ */
diff --git a/modules/gpu/src/nvidia/core/NCV.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp
similarity index 100%
rename from modules/gpu/src/nvidia/core/NCV.hpp
rename to modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp
diff --git a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
similarity index 99%
rename from modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp
rename to modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
index 139f7bea8..3300e1006 100644
--- a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
@@ -60,7 +60,7 @@
 #ifndef _ncv_optical_flow_h_
 #define _ncv_optical_flow_h_
 
-#include "NCV.hpp"
+#include "opencv2/gpunvidia/NCV.hpp"
 
 /// \brief Model and solver parameters
 struct NCVBroxOpticalFlowDescriptor
diff --git a/modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
similarity index 99%
rename from modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp
rename to modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
index bbda61fec..c067a9135 100644
--- a/modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
@@ -59,7 +59,7 @@
 #ifndef _ncvhaarobjectdetection_hpp_
 #define _ncvhaarobjectdetection_hpp_
 
-#include "NCV.hpp"
+#include "opencv2/gpunvidia/NCV.hpp"
 
 
 //==============================================================================
diff --git a/modules/gpu/src/nvidia/core/NCVPyramid.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
similarity index 98%
rename from modules/gpu/src/nvidia/core/NCVPyramid.hpp
rename to modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
index 183428afa..c88dbc271 100644
--- a/modules/gpu/src/nvidia/core/NCVPyramid.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
@@ -45,7 +45,7 @@
 
 #include <memory>
 #include <vector>
-#include "NCV.hpp"
+#include "opencv2/gpunvidia/NCV.hpp"
 
 #if 0 //def _WIN32
 
diff --git a/modules/gpu/src/nvidia/NPP_staging/NPP_staging.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
similarity index 99%
rename from modules/gpu/src/nvidia/NPP_staging/NPP_staging.hpp
rename to modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
index 073448e17..823be6943 100644
--- a/modules/gpu/src/nvidia/NPP_staging/NPP_staging.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
@@ -43,7 +43,7 @@
 #ifndef _npp_staging_hpp_
 #define _npp_staging_hpp_
 
-#include "NCV.hpp"
+#include "opencv2/gpunvidia/NCV.hpp"
 
 
 /**
diff --git a/modules/gpu/src/nvidia/core/NCV.cu b/modules/gpunvidia/src/NCV.cpp
similarity index 82%
rename from modules/gpu/src/nvidia/core/NCV.cu
rename to modules/gpunvidia/src/NCV.cpp
index 718b4faab..979276c7c 100644
--- a/modules/gpu/src/nvidia/core/NCV.cu
+++ b/modules/gpunvidia/src/NCV.cpp
@@ -40,10 +40,7 @@
 //
 //M*/
 
-#include <iostream>
-#include <vector>
-#include "NCV.hpp"
-
+#include "precomp.hpp"
 
 //==============================================================================
 //
@@ -72,8 +69,6 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
     debugOutputHandler = func;
 }
 
-#if !defined CUDA_DISABLER
-
 
 //==============================================================================
 //
@@ -251,15 +246,14 @@ NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
 //===================================================================
 
 
-NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment_)
-    :
-    currentSize(0),
-    _maxSize(0),
+NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment_) :
+    _memType(NCVMemoryTypeNone),
+    _alignment(alignment_),
     allocBegin(NULL),
     begin(NULL),
     end(NULL),
-    _memType(NCVMemoryTypeNone),
-    _alignment(alignment_),
+    currentSize(0),
+    _maxSize(0),
     bReusesMemory(false)
 {
     NcvBool bProperAlignment = (alignment_ & (alignment_ - 1)) == 0;
@@ -267,13 +261,12 @@ NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment_)
 }
 
 
-NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment_, void *reusePtr)
-    :
-    currentSize(0),
-    _maxSize(0),
-    allocBegin(NULL),
+NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment_, void *reusePtr) :
     _memType(memT),
-    _alignment(alignment_)
+    _alignment(alignment_),
+    allocBegin(NULL),
+    currentSize(0),
+    _maxSize(0)
 {
     NcvBool bProperAlignment = (alignment_ & (alignment_ - 1)) == 0;
     ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");
@@ -389,7 +382,7 @@ NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg)
 
 NcvBool NCVMemStackAllocator::isInitialized(void) const
 {
-    return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL;
+    return (((this->_alignment & (this->_alignment-1)) == 0) && isCounting()) || this->allocBegin != NULL;
 }
 
 
@@ -424,12 +417,11 @@ size_t NCVMemStackAllocator::maxSize(void) const
 //===================================================================
 
 
-NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment_)
-    :
-    currentSize(0),
-    _maxSize(0),
+NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment_) :
     _memType(memT),
-    _alignment(alignment_)
+    _alignment(alignment_),
+    currentSize(0),
+    _maxSize(0)
 {
     ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
 }
@@ -649,9 +641,46 @@ double ncvEndQueryTimerMs(NcvTimer t)
 //
 //===================================================================
 
+struct RectConvert
+{
+    cv::Rect operator()(const NcvRect32u& nr) const { return cv::Rect(nr.x, nr.y, nr.width, nr.height); }
+    NcvRect32u operator()(const cv::Rect& nr) const
+    {
+        NcvRect32u rect;
+        rect.x = nr.x;
+        rect.y = nr.y;
+        rect.width = nr.width;
+        rect.height = nr.height;
+        return rect;
+    }
+};
+
+static void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
+{
+    std::vector<cv::Rect> rects(hypotheses.size());
+    std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
+
+    if (weights)
+    {
+        std::vector<int> weights_int;
+        weights_int.assign(weights->begin(), weights->end());
+        cv::groupRectangles(rects, weights_int, groupThreshold, eps);
+    }
+    else
+    {
+        cv::groupRectangles(rects, groupThreshold, eps);
+    }
+    std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
+    hypotheses.resize(rects.size());
+}
+
+
+//===================================================================
+//
+// Operations with rectangles
+//
+//===================================================================
 
-//from OpenCV
-void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights);
 
 
 NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses,
@@ -776,133 +805,3 @@ NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
 {
     return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
 }
-
-
-const Ncv32u NUMTHREADS_DRAWRECTS = 32;
-const Ncv32u NUMTHREADS_DRAWRECTS_LOG2 = 5;
-
-
-template <class T>
-__global__ void drawRects(T *d_dst,
-                          Ncv32u dstStride,
-                          Ncv32u dstWidth,
-                          Ncv32u dstHeight,
-                          NcvRect32u *d_rects,
-                          Ncv32u numRects,
-                          T color)
-{
-    Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;
-    if (blockId > numRects * 4)
-    {
-        return;
-    }
-
-    NcvRect32u curRect = d_rects[blockId >> 2];
-    NcvBool bVertical = blockId & 0x1;
-    NcvBool bTopLeft = blockId & 0x2;
-
-    Ncv32u pt0x, pt0y;
-    if (bVertical)
-    {
-        Ncv32u numChunks = (curRect.height + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
-
-        pt0x = bTopLeft ? curRect.x : curRect.x + curRect.width - 1;
-        pt0y = curRect.y;
-
-        if (pt0x < dstWidth)
-        {
-            for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
-            {
-                Ncv32u ptY = pt0y + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
-                if (ptY < pt0y + curRect.height && ptY < dstHeight)
-                {
-                    d_dst[ptY * dstStride + pt0x] = color;
-                }
-            }
-        }
-    }
-    else
-    {
-        Ncv32u numChunks = (curRect.width + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
-
-        pt0x = curRect.x;
-        pt0y = bTopLeft ? curRect.y : curRect.y + curRect.height - 1;
-
-        if (pt0y < dstHeight)
-        {
-            for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
-            {
-                Ncv32u ptX = pt0x + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
-                if (ptX < pt0x + curRect.width && ptX < dstWidth)
-                {
-                    d_dst[pt0y * dstStride + ptX] = color;
-                }
-            }
-        }
-    }
-}
-
-
-template <class T>
-static NCVStatus drawRectsWrapperDevice(T *d_dst,
-                                        Ncv32u dstStride,
-                                        Ncv32u dstWidth,
-                                        Ncv32u dstHeight,
-                                        NcvRect32u *d_rects,
-                                        Ncv32u numRects,
-                                        T color,
-                                        cudaStream_t cuStream)
-{
-    (void)cuStream;
-    ncvAssertReturn(d_dst != NULL && d_rects != NULL, NCV_NULL_PTR);
-    ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
-    ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
-    ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
-
-    if (numRects == 0)
-    {
-        return NCV_SUCCESS;
-    }
-
-    dim3 grid(numRects * 4);
-    dim3 block(NUMTHREADS_DRAWRECTS);
-    if (grid.x > 65535)
-    {
-        grid.y = (grid.x + 65534) / 65535;
-        grid.x = 65535;
-    }
-
-    drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
-
-    ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
-
-    return NCV_SUCCESS;
-}
-
-
-NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
-                                 Ncv32u dstStride,
-                                 Ncv32u dstWidth,
-                                 Ncv32u dstHeight,
-                                 NcvRect32u *d_rects,
-                                 Ncv32u numRects,
-                                 Ncv8u color,
-                                 cudaStream_t cuStream)
-{
-    return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
-}
-
-
-NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
-                                  Ncv32u dstStride,
-                                  Ncv32u dstWidth,
-                                  Ncv32u dstHeight,
-                                  NcvRect32u *d_rects,
-                                  Ncv32u numRects,
-                                  Ncv32u color,
-                                  cudaStream_t cuStream)
-{
-    return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
-}
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpunvidia/src/cuda/NCV.cu b/modules/gpunvidia/src/cuda/NCV.cu
new file mode 100644
index 000000000..0e5b50e9a
--- /dev/null
+++ b/modules/gpunvidia/src/cuda/NCV.cu
@@ -0,0 +1,180 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <iostream>
+#include <vector>
+
+#include "opencv2/gpunvidia/NCV.hpp"
+
+//===================================================================
+//
+// Operations with rectangles
+//
+//===================================================================
+
+
+const Ncv32u NUMTHREADS_DRAWRECTS = 32;
+const Ncv32u NUMTHREADS_DRAWRECTS_LOG2 = 5;
+
+
+template <class T>
+__global__ void drawRects(T *d_dst,
+                          Ncv32u dstStride,
+                          Ncv32u dstWidth,
+                          Ncv32u dstHeight,
+                          NcvRect32u *d_rects,
+                          Ncv32u numRects,
+                          T color)
+{
+    Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;
+    if (blockId > numRects * 4)
+    {
+        return;
+    }
+
+    NcvRect32u curRect = d_rects[blockId >> 2];
+    NcvBool bVertical = blockId & 0x1;
+    NcvBool bTopLeft = blockId & 0x2;
+
+    Ncv32u pt0x, pt0y;
+    if (bVertical)
+    {
+        Ncv32u numChunks = (curRect.height + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
+
+        pt0x = bTopLeft ? curRect.x : curRect.x + curRect.width - 1;
+        pt0y = curRect.y;
+
+        if (pt0x < dstWidth)
+        {
+            for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
+            {
+                Ncv32u ptY = pt0y + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
+                if (ptY < pt0y + curRect.height && ptY < dstHeight)
+                {
+                    d_dst[ptY * dstStride + pt0x] = color;
+                }
+            }
+        }
+    }
+    else
+    {
+        Ncv32u numChunks = (curRect.width + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
+
+        pt0x = curRect.x;
+        pt0y = bTopLeft ? curRect.y : curRect.y + curRect.height - 1;
+
+        if (pt0y < dstHeight)
+        {
+            for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
+            {
+                Ncv32u ptX = pt0x + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
+                if (ptX < pt0x + curRect.width && ptX < dstWidth)
+                {
+                    d_dst[pt0y * dstStride + ptX] = color;
+                }
+            }
+        }
+    }
+}
+
+
+template <class T>
+static NCVStatus drawRectsWrapperDevice(T *d_dst,
+                                        Ncv32u dstStride,
+                                        Ncv32u dstWidth,
+                                        Ncv32u dstHeight,
+                                        NcvRect32u *d_rects,
+                                        Ncv32u numRects,
+                                        T color,
+                                        cudaStream_t cuStream)
+{
+    (void)cuStream;
+    ncvAssertReturn(d_dst != NULL && d_rects != NULL, NCV_NULL_PTR);
+    ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
+    ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
+    ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
+
+    if (numRects == 0)
+    {
+        return NCV_SUCCESS;
+    }
+
+    dim3 grid(numRects * 4);
+    dim3 block(NUMTHREADS_DRAWRECTS);
+    if (grid.x > 65535)
+    {
+        grid.y = (grid.x + 65534) / 65535;
+        grid.x = 65535;
+    }
+
+    drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
+
+    ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
+
+    return NCV_SUCCESS;
+}
+
+
+NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
+                                 Ncv32u dstStride,
+                                 Ncv32u dstWidth,
+                                 Ncv32u dstHeight,
+                                 NcvRect32u *d_rects,
+                                 Ncv32u numRects,
+                                 Ncv8u color,
+                                 cudaStream_t cuStream)
+{
+    return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
+}
+
+
+NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
+                                  Ncv32u dstStride,
+                                  Ncv32u dstWidth,
+                                  Ncv32u dstHeight,
+                                  NcvRect32u *d_rects,
+                                  Ncv32u numRects,
+                                  Ncv32u color,
+                                  cudaStream_t cuStream)
+{
+    return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
+}
diff --git a/modules/gpu/src/nvidia/core/NCVAlg.hpp b/modules/gpunvidia/src/cuda/NCVAlg.hpp
similarity index 99%
rename from modules/gpu/src/nvidia/core/NCVAlg.hpp
rename to modules/gpunvidia/src/cuda/NCVAlg.hpp
index 3a0a28220..ad14d749f 100644
--- a/modules/gpu/src/nvidia/core/NCVAlg.hpp
+++ b/modules/gpunvidia/src/cuda/NCVAlg.hpp
@@ -43,7 +43,7 @@
 #ifndef _ncv_alg_hpp_
 #define _ncv_alg_hpp_
 
-#include "NCV.hpp"
+#include "opencv2/gpunvidia/NCV.hpp"
 
 
 template <class T>
diff --git a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu b/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
similarity index 99%
rename from modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu
rename to modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
index f865f957f..4faba6331 100644
--- a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu
+++ b/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
@@ -57,16 +57,15 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-#if !defined CUDA_DISABLER
-
 #include <iostream>
 #include <vector>
 #include <memory>
 
-#include "NPP_staging/NPP_staging.hpp"
-#include "NCVBroxOpticalFlow.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 
+#include "opencv2/gpunvidia/NPP_staging.hpp"
+#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
+
 
 typedef NCVVectorAlloc<Ncv32f> FloatVector;
 
@@ -1163,5 +1162,3 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
 
     return NCV_SUCCESS;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/nvidia/core/NCVColorConversion.hpp b/modules/gpunvidia/src/cuda/NCVColorConversion.hpp
similarity index 100%
rename from modules/gpu/src/nvidia/core/NCVColorConversion.hpp
rename to modules/gpunvidia/src/cuda/NCVColorConversion.hpp
diff --git a/modules/gpu/src/nvidia/NCVHaarObjectDetection.cu b/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
similarity index 92%
rename from modules/gpu/src/nvidia/NCVHaarObjectDetection.cu
rename to modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
index f7a913bc6..9ab0194a4 100644
--- a/modules/gpu/src/nvidia/NCVHaarObjectDetection.cu
+++ b/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
@@ -56,18 +56,19 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-#if !defined CUDA_DISABLER
-
 #include <algorithm>
 #include <cstdio>
 
-#include "NCV.hpp"
-#include "NCVAlg.hpp"
-#include "NPP_staging/NPP_staging.hpp"
-#include "NCVRuntimeTemplates.hpp"
-#include "NCVHaarObjectDetection.hpp"
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
+#include "opencv2/objdetect.hpp"
+
+#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpunvidia/NPP_staging.hpp"
+#include "opencv2/gpunvidia/NCVHaarObjectDetection.hpp"
+
+#include "NCVRuntimeTemplates.hpp"
+#include "NCVAlg.hpp"
 
 
 //==============================================================================
@@ -2099,12 +2100,201 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
     return ncvStat;
 }
 
-
-NCVStatus loadFromXML(const cv::String &filename,
+static NCVStatus loadFromXML(const cv::String &filename,
                       HaarClassifierCascadeDescriptor &haar,
                       std::vector<HaarStage64> &haarStages,
                       std::vector<HaarClassifierNode128> &haarClassifierNodes,
-                      std::vector<HaarFeature64> &haarFeatures);
+                      std::vector<HaarFeature64> &haarFeatures)
+{
+    NCVStatus ncvStat;
+
+    haar.NumStages = 0;
+    haar.NumClassifierRootNodes = 0;
+    haar.NumClassifierTotalNodes = 0;
+    haar.NumFeatures = 0;
+    haar.ClassifierSize.width = 0;
+    haar.ClassifierSize.height = 0;
+    haar.bHasStumpsOnly = true;
+    haar.bNeedsTiltedII = false;
+    Ncv32u curMaxTreeDepth;
+
+    std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
+    haarStages.resize(0);
+    haarClassifierNodes.resize(0);
+    haarFeatures.resize(0);
+
+    cv::Ptr<CvHaarClassifierCascade> oldCascade = (CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0);
+    if (oldCascade.empty())
+    {
+        return NCV_HAAR_XML_LOADING_EXCEPTION;
+    }
+
+    haar.ClassifierSize.width = oldCascade->orig_window_size.width;
+    haar.ClassifierSize.height = oldCascade->orig_window_size.height;
+
+    int stagesCound = oldCascade->count;
+    for(int s = 0; s < stagesCound; ++s) // by stages
+    {
+        HaarStage64 curStage;
+        curStage.setStartClassifierRootNodeOffset(static_cast<Ncv32u>(haarClassifierNodes.size()));
+
+        curStage.setStageThreshold(oldCascade->stage_classifier[s].threshold);
+
+        int treesCount = oldCascade->stage_classifier[s].count;
+        for(int t = 0; t < treesCount; ++t) // by trees
+        {
+            Ncv32u nodeId = 0;
+            CvHaarClassifier* tree = &oldCascade->stage_classifier[s].classifier[t];
+
+            int nodesCount = tree->count;
+            for(int n = 0; n < nodesCount; ++n)  //by features
+            {
+                CvHaarFeature* feature = &tree->haar_feature[n];
+
+                HaarClassifierNode128 curNode;
+                curNode.setThreshold(tree->threshold[n]);
+
+                NcvBool bIsLeftNodeLeaf = false;
+                NcvBool bIsRightNodeLeaf = false;
+
+                HaarClassifierNodeDescriptor32 nodeLeft;
+                if ( tree->left[n] <= 0 )
+                {
+                    Ncv32f leftVal = tree->alpha[-tree->left[n]];
+                    ncvStat = nodeLeft.create(leftVal);
+                    ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
+                    bIsLeftNodeLeaf = true;
+                }
+                else
+                {
+                    Ncv32u leftNodeOffset = tree->left[n];
+                    nodeLeft.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1));
+                    haar.bHasStumpsOnly = false;
+                }
+                curNode.setLeftNodeDesc(nodeLeft);
+
+                HaarClassifierNodeDescriptor32 nodeRight;
+                if ( tree->right[n] <= 0 )
+                {
+                    Ncv32f rightVal = tree->alpha[-tree->right[n]];
+                    ncvStat = nodeRight.create(rightVal);
+                    ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
+                    bIsRightNodeLeaf = true;
+                }
+                else
+                {
+                    Ncv32u rightNodeOffset = tree->right[n];
+                    nodeRight.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + rightNodeOffset - 1));
+                    haar.bHasStumpsOnly = false;
+                }
+                curNode.setRightNodeDesc(nodeRight);
+
+                Ncv32u tiltedVal = feature->tilted;
+                haar.bNeedsTiltedII = (tiltedVal != 0);
+
+                Ncv32u featureId = 0;
+                for(int l = 0; l < CV_HAAR_FEATURE_MAX; ++l) //by rects
+                {
+                    Ncv32u rectX = feature->rect[l].r.x;
+                    Ncv32u rectY = feature->rect[l].r.y;
+                    Ncv32u rectWidth = feature->rect[l].r.width;
+                    Ncv32u rectHeight = feature->rect[l].r.height;
+
+                    Ncv32f rectWeight = feature->rect[l].weight;
+
+                    if (rectWeight == 0/* && rectX == 0 &&rectY == 0 && rectWidth == 0 && rectHeight == 0*/)
+                        break;
+
+                    HaarFeature64 curFeature;
+                    ncvStat = curFeature.setRect(rectX, rectY, rectWidth, rectHeight, haar.ClassifierSize.width, haar.ClassifierSize.height);
+                    curFeature.setWeight(rectWeight);
+                    ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
+                    haarFeatures.push_back(curFeature);
+
+                    featureId++;
+                }
+
+                HaarFeatureDescriptor32 tmpFeatureDesc;
+                ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, bIsLeftNodeLeaf, bIsRightNodeLeaf,
+                    featureId, static_cast<Ncv32u>(haarFeatures.size()) - featureId);
+                ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
+                curNode.setFeatureDesc(tmpFeatureDesc);
+
+                if (!nodeId)
+                {
+                    //root node
+                    haarClassifierNodes.push_back(curNode);
+                    curMaxTreeDepth = 1;
+                }
+                else
+                {
+                    //other node
+                    h_TmpClassifierNotRootNodes.push_back(curNode);
+                    curMaxTreeDepth++;
+                }
+
+                nodeId++;
+            }
+        }
+
+        curStage.setNumClassifierRootNodes(treesCount);
+        haarStages.push_back(curStage);
+    }
+
+    //fill in cascade stats
+    haar.NumStages = static_cast<Ncv32u>(haarStages.size());
+    haar.NumClassifierRootNodes = static_cast<Ncv32u>(haarClassifierNodes.size());
+    haar.NumClassifierTotalNodes = static_cast<Ncv32u>(haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size());
+    haar.NumFeatures = static_cast<Ncv32u>(haarFeatures.size());
+
+    //merge root and leaf nodes in one classifiers array
+    Ncv32u offsetRoot = static_cast<Ncv32u>(haarClassifierNodes.size());
+    for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
+    {
+        HaarFeatureDescriptor32 featureDesc = haarClassifierNodes[i].getFeatureDesc();
+
+        HaarClassifierNodeDescriptor32 nodeLeft = haarClassifierNodes[i].getLeftNodeDesc();
+        if (!featureDesc.isLeftNodeLeaf())
+        {
+            Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
+            nodeLeft.create(newOffset);
+        }
+        haarClassifierNodes[i].setLeftNodeDesc(nodeLeft);
+
+        HaarClassifierNodeDescriptor32 nodeRight = haarClassifierNodes[i].getRightNodeDesc();
+        if (!featureDesc.isRightNodeLeaf())
+        {
+            Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
+            nodeRight.create(newOffset);
+        }
+        haarClassifierNodes[i].setRightNodeDesc(nodeRight);
+    }
+
+    for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
+    {
+        HaarFeatureDescriptor32 featureDesc = h_TmpClassifierNotRootNodes[i].getFeatureDesc();
+
+        HaarClassifierNodeDescriptor32 nodeLeft = h_TmpClassifierNotRootNodes[i].getLeftNodeDesc();
+        if (!featureDesc.isLeftNodeLeaf())
+        {
+            Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
+            nodeLeft.create(newOffset);
+        }
+        h_TmpClassifierNotRootNodes[i].setLeftNodeDesc(nodeLeft);
+
+        HaarClassifierNodeDescriptor32 nodeRight = h_TmpClassifierNotRootNodes[i].getRightNodeDesc();
+        if (!featureDesc.isRightNodeLeaf())
+        {
+            Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
+            nodeRight.create(newOffset);
+        }
+        h_TmpClassifierNotRootNodes[i].setRightNodeDesc(nodeRight);
+
+        haarClassifierNodes.push_back(h_TmpClassifierNotRootNodes[i]);
+    }
+
+    return NCV_SUCCESS;
+}
 
 
 #define NVBIN_HAAR_SIZERESERVED     16
@@ -2334,5 +2524,3 @@ NCVStatus ncvHaarStoreNVBIN_host(const cv::String &filename,
     fclose(fp);
     return NCV_SUCCESS;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp b/modules/gpunvidia/src/cuda/NCVPixelOperations.hpp
similarity index 99%
rename from modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
rename to modules/gpunvidia/src/cuda/NCVPixelOperations.hpp
index ec2f16ebb..2acdfb682 100644
--- a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
+++ b/modules/gpunvidia/src/cuda/NCVPixelOperations.hpp
@@ -45,7 +45,7 @@
 
 #include <limits.h>
 #include <float.h>
-#include "NCV.hpp"
+#include "opencv2/gpunvidia/NCV.hpp"
 
 template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
 template<> static inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
diff --git a/modules/gpu/src/nvidia/core/NCVPyramid.cu b/modules/gpunvidia/src/cuda/NCVPyramid.cu
similarity index 99%
rename from modules/gpu/src/nvidia/core/NCVPyramid.cu
rename to modules/gpunvidia/src/cuda/NCVPyramid.cu
index 380916cce..6b76c644b 100644
--- a/modules/gpu/src/nvidia/core/NCVPyramid.cu
+++ b/modules/gpunvidia/src/cuda/NCVPyramid.cu
@@ -40,16 +40,17 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include <cuda_runtime.h>
 #include <stdio.h>
-#include "NCV.hpp"
-#include "NCVAlg.hpp"
-#include "NCVPyramid.hpp"
-#include "NCVPixelOperations.hpp"
+#include <cuda_runtime.h>
+
 #include "opencv2/core/cuda/common.hpp"
 
+#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpunvidia/NCVPyramid.hpp"
+
+#include "NCVAlg.hpp"
+#include "NCVPixelOperations.hpp"
+
 template<typename T, Ncv32u CN> struct __average4_CN {static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);};
 
 template<typename T> struct __average4_CN<T, 1> {
@@ -602,5 +603,3 @@ template class NCVImagePyramid<float3>;
 template class NCVImagePyramid<float4>;
 
 #endif //_WIN32
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/nvidia/core/NCVRuntimeTemplates.hpp b/modules/gpunvidia/src/cuda/NCVRuntimeTemplates.hpp
similarity index 100%
rename from modules/gpu/src/nvidia/core/NCVRuntimeTemplates.hpp
rename to modules/gpunvidia/src/cuda/NCVRuntimeTemplates.hpp
diff --git a/modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu b/modules/gpunvidia/src/cuda/NPP_staging.cu
similarity index 99%
rename from modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu
rename to modules/gpunvidia/src/cuda/NPP_staging.cu
index 6d0f9c5c8..31f7adc1d 100644
--- a/modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu
+++ b/modules/gpunvidia/src/cuda/NPP_staging.cu
@@ -44,10 +44,12 @@
 
 #include <vector>
 #include <cuda_runtime.h>
-#include "NPP_staging.hpp"
+
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
 
+#include "opencv2/gpunvidia/NPP_staging.hpp"
+
 
 texture<Ncv8u,  1, cudaReadModeElementType> tex8u;
 texture<Ncv32u, 1, cudaReadModeElementType> tex32u;
diff --git a/modules/gpunvidia/src/precomp.cpp b/modules/gpunvidia/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpunvidia/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpunvidia/src/precomp.hpp b/modules/gpunvidia/src/precomp.hpp
new file mode 100644
index 000000000..613670009
--- /dev/null
+++ b/modules/gpunvidia/src/precomp.hpp
@@ -0,0 +1,56 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+#include <iostream>
+#include <algorithm>
+
+#include "opencv2/gpunvidia.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/objdetect.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/test/nvidia/NCVAutoTestLister.hpp b/modules/gpunvidia/test/NCVAutoTestLister.hpp
similarity index 97%
rename from modules/gpu/test/nvidia/NCVAutoTestLister.hpp
rename to modules/gpunvidia/test/NCVAutoTestLister.hpp
index 6ac5bc0cf..8730eeea7 100644
--- a/modules/gpu/test/nvidia/NCVAutoTestLister.hpp
+++ b/modules/gpunvidia/test/NCVAutoTestLister.hpp
@@ -46,13 +46,7 @@
 #include <vector>
 
 #include "NCVTest.hpp"
-#include <main_test_nvidia.h>
-//enum OutputLevel
-//{
-//    OutputLevelNone,
-//    OutputLevelCompact,
-//    OutputLevelFull
-//};
+#include "main_test_nvidia.h"
 
 class NCVAutoTestLister
 {
diff --git a/modules/gpu/test/nvidia/NCVTest.hpp b/modules/gpunvidia/test/NCVTest.hpp
similarity index 99%
rename from modules/gpu/test/nvidia/NCVTest.hpp
rename to modules/gpunvidia/test/NCVTest.hpp
index 22958e565..d08044db0 100644
--- a/modules/gpu/test/nvidia/NCVTest.hpp
+++ b/modules/gpunvidia/test/NCVTest.hpp
@@ -55,7 +55,8 @@
 #include <fstream>
 
 #include <cuda_runtime.h>
-#include "NPP_staging.hpp"
+
+#include "opencv2/gpunvidia.hpp"
 
 
 struct NCVTestReport
diff --git a/modules/gpu/test/nvidia/NCVTestSourceProvider.hpp b/modules/gpunvidia/test/NCVTestSourceProvider.hpp
similarity index 99%
rename from modules/gpu/test/nvidia/NCVTestSourceProvider.hpp
rename to modules/gpunvidia/test/NCVTestSourceProvider.hpp
index df245bb82..38b9d814c 100644
--- a/modules/gpu/test/nvidia/NCVTestSourceProvider.hpp
+++ b/modules/gpunvidia/test/NCVTestSourceProvider.hpp
@@ -45,8 +45,8 @@
 
 #include <memory>
 
-#include "NCV.hpp"
-#include <opencv2/highgui.hpp>
+#include "opencv2/highgui.hpp"
+#include "opencv2/gpunvidia.hpp"
 
 
 template <class T>
diff --git a/modules/gpu/test/nvidia/TestCompact.cpp b/modules/gpunvidia/test/TestCompact.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestCompact.cpp
rename to modules/gpunvidia/test/TestCompact.cpp
index 915410107..70640f37d 100644
--- a/modules/gpu/test/nvidia/TestCompact.cpp
+++ b/modules/gpunvidia/test/TestCompact.cpp
@@ -40,10 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include "TestCompact.h"
-
+#include "test_precomp.hpp"
 
 TestCompact::TestCompact(std::string testName_, NCVTestSourceProvider<Ncv32u> &src_,
                                              Ncv32u length_, Ncv32u badElem_, Ncv32u badElemPercentage_)
@@ -160,5 +157,3 @@ bool TestCompact::deinit()
 {
     return true;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestCompact.h b/modules/gpunvidia/test/TestCompact.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestCompact.h
rename to modules/gpunvidia/test/TestCompact.h
diff --git a/modules/gpu/test/nvidia/TestDrawRects.cpp b/modules/gpunvidia/test/TestDrawRects.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestDrawRects.cpp
rename to modules/gpunvidia/test/TestDrawRects.cpp
index 3da458694..40d8e21b4 100644
--- a/modules/gpu/test/nvidia/TestDrawRects.cpp
+++ b/modules/gpunvidia/test/TestDrawRects.cpp
@@ -40,10 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include "TestDrawRects.h"
-#include "NCVHaarObjectDetection.hpp"
+#include "test_precomp.hpp"
 
 
 template <class T>
@@ -195,5 +192,3 @@ bool TestDrawRects<T>::deinit()
 
 template class TestDrawRects<Ncv8u>;
 template class TestDrawRects<Ncv32u>;
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestDrawRects.h b/modules/gpunvidia/test/TestDrawRects.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestDrawRects.h
rename to modules/gpunvidia/test/TestDrawRects.h
diff --git a/modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp b/modules/gpunvidia/test/TestHaarCascadeApplication.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp
rename to modules/gpunvidia/test/TestHaarCascadeApplication.cpp
index 01a9637b8..121f31e43 100644
--- a/modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp
+++ b/modules/gpunvidia/test/TestHaarCascadeApplication.cpp
@@ -40,13 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include <float.h>
-
-#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__arm__)
-    #include <fpu_control.h>
-#endif
+#include "test_precomp.hpp"
 
 namespace
 {
@@ -88,10 +82,6 @@ namespace
     }
 }
 
-#include "TestHaarCascadeApplication.h"
-#include "NCVHaarObjectDetection.hpp"
-
-
 TestHaarCascadeApplication::TestHaarCascadeApplication(std::string testName_, NCVTestSourceProvider<Ncv8u> &src_,
                                                        std::string cascadeName_, Ncv32u width_, Ncv32u height_)
     :
@@ -343,5 +333,3 @@ bool TestHaarCascadeApplication::deinit()
 {
     return true;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestHaarCascadeApplication.h b/modules/gpunvidia/test/TestHaarCascadeApplication.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestHaarCascadeApplication.h
rename to modules/gpunvidia/test/TestHaarCascadeApplication.h
diff --git a/modules/gpu/test/nvidia/TestHaarCascadeLoader.cpp b/modules/gpunvidia/test/TestHaarCascadeLoader.cpp
similarity index 97%
rename from modules/gpu/test/nvidia/TestHaarCascadeLoader.cpp
rename to modules/gpunvidia/test/TestHaarCascadeLoader.cpp
index 42552295d..b1e840a54 100644
--- a/modules/gpu/test/nvidia/TestHaarCascadeLoader.cpp
+++ b/modules/gpunvidia/test/TestHaarCascadeLoader.cpp
@@ -40,10 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include "TestHaarCascadeLoader.h"
-#include "NCVHaarObjectDetection.hpp"
+#include "test_precomp.hpp"
 
 
 TestHaarCascadeLoader::TestHaarCascadeLoader(std::string testName_, std::string cascadeName_)
@@ -154,5 +151,3 @@ bool TestHaarCascadeLoader::deinit()
 {
     return true;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestHaarCascadeLoader.h b/modules/gpunvidia/test/TestHaarCascadeLoader.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestHaarCascadeLoader.h
rename to modules/gpunvidia/test/TestHaarCascadeLoader.h
diff --git a/modules/gpu/test/nvidia/TestHypothesesFilter.cpp b/modules/gpunvidia/test/TestHypothesesFilter.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestHypothesesFilter.cpp
rename to modules/gpunvidia/test/TestHypothesesFilter.cpp
index f71a5f72d..39d655661 100644
--- a/modules/gpu/test/nvidia/TestHypothesesFilter.cpp
+++ b/modules/gpunvidia/test/TestHypothesesFilter.cpp
@@ -40,10 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include "TestHypothesesFilter.h"
-#include "NCVHaarObjectDetection.hpp"
+#include "test_precomp.hpp"
 
 
 TestHypothesesFilter::TestHypothesesFilter(std::string testName_, NCVTestSourceProvider<Ncv32u> &src_,
@@ -207,5 +204,3 @@ bool TestHypothesesFilter::deinit()
 {
     return true;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestHypothesesFilter.h b/modules/gpunvidia/test/TestHypothesesFilter.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestHypothesesFilter.h
rename to modules/gpunvidia/test/TestHypothesesFilter.h
diff --git a/modules/gpu/test/nvidia/TestHypothesesGrow.cpp b/modules/gpunvidia/test/TestHypothesesGrow.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestHypothesesGrow.cpp
rename to modules/gpunvidia/test/TestHypothesesGrow.cpp
index 5cb81ddf4..e7fe4d939 100644
--- a/modules/gpu/test/nvidia/TestHypothesesGrow.cpp
+++ b/modules/gpunvidia/test/TestHypothesesGrow.cpp
@@ -40,10 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include "TestHypothesesGrow.h"
-#include "NCVHaarObjectDetection.hpp"
+#include "test_precomp.hpp"
 
 
 TestHypothesesGrow::TestHypothesesGrow(std::string testName_, NCVTestSourceProvider<Ncv32u> &src_,
@@ -165,5 +162,3 @@ bool TestHypothesesGrow::deinit()
 {
     return true;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestHypothesesGrow.h b/modules/gpunvidia/test/TestHypothesesGrow.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestHypothesesGrow.h
rename to modules/gpunvidia/test/TestHypothesesGrow.h
diff --git a/modules/gpu/test/nvidia/TestIntegralImage.cpp b/modules/gpunvidia/test/TestIntegralImage.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestIntegralImage.cpp
rename to modules/gpunvidia/test/TestIntegralImage.cpp
index a0820a821..c04edff7c 100644
--- a/modules/gpu/test/nvidia/TestIntegralImage.cpp
+++ b/modules/gpunvidia/test/TestIntegralImage.cpp
@@ -40,10 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include <math.h>
-#include "TestIntegralImage.h"
+#include "test_precomp.hpp"
 
 
 template <class T_in, class T_out>
@@ -216,5 +213,3 @@ bool TestIntegralImage<T_in, T_out>::deinit()
 
 template class TestIntegralImage<Ncv8u, Ncv32u>;
 template class TestIntegralImage<Ncv32f, Ncv32f>;
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestIntegralImage.h b/modules/gpunvidia/test/TestIntegralImage.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestIntegralImage.h
rename to modules/gpunvidia/test/TestIntegralImage.h
diff --git a/modules/gpu/test/nvidia/TestIntegralImageSquared.cpp b/modules/gpunvidia/test/TestIntegralImageSquared.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestIntegralImageSquared.cpp
rename to modules/gpunvidia/test/TestIntegralImageSquared.cpp
index 4f4e45d40..5481fa2e3 100644
--- a/modules/gpu/test/nvidia/TestIntegralImageSquared.cpp
+++ b/modules/gpunvidia/test/TestIntegralImageSquared.cpp
@@ -40,9 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include "TestIntegralImageSquared.h"
+#include "test_precomp.hpp"
 
 
 TestIntegralImageSquared::TestIntegralImageSquared(std::string testName_, NCVTestSourceProvider<Ncv8u> &src_,
@@ -148,5 +146,3 @@ bool TestIntegralImageSquared::deinit()
 {
     return true;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestIntegralImageSquared.h b/modules/gpunvidia/test/TestIntegralImageSquared.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestIntegralImageSquared.h
rename to modules/gpunvidia/test/TestIntegralImageSquared.h
diff --git a/modules/gpu/test/nvidia/TestRectStdDev.cpp b/modules/gpunvidia/test/TestRectStdDev.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestRectStdDev.cpp
rename to modules/gpunvidia/test/TestRectStdDev.cpp
index c019b0ee3..86bb9ed23 100644
--- a/modules/gpu/test/nvidia/TestRectStdDev.cpp
+++ b/modules/gpunvidia/test/TestRectStdDev.cpp
@@ -40,11 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include <math.h>
-
-#include "TestRectStdDev.h"
+#include "test_precomp.hpp"
 
 
 TestRectStdDev::TestRectStdDev(std::string testName_, NCVTestSourceProvider<Ncv8u> &src_,
@@ -211,5 +207,3 @@ bool TestRectStdDev::deinit()
 {
     return true;
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestRectStdDev.h b/modules/gpunvidia/test/TestRectStdDev.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestRectStdDev.h
rename to modules/gpunvidia/test/TestRectStdDev.h
diff --git a/modules/gpu/test/nvidia/TestResize.cpp b/modules/gpunvidia/test/TestResize.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestResize.cpp
rename to modules/gpunvidia/test/TestResize.cpp
index 83443c88b..d2080f06d 100644
--- a/modules/gpu/test/nvidia/TestResize.cpp
+++ b/modules/gpunvidia/test/TestResize.cpp
@@ -40,11 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include <math.h>
-
-#include "TestResize.h"
+#include "test_precomp.hpp"
 
 
 template <class T>
@@ -192,5 +188,3 @@ bool TestResize<T>::deinit()
 
 template class TestResize<Ncv32u>;
 template class TestResize<Ncv64u>;
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestResize.h b/modules/gpunvidia/test/TestResize.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestResize.h
rename to modules/gpunvidia/test/TestResize.h
diff --git a/modules/gpu/test/nvidia/TestTranspose.cpp b/modules/gpunvidia/test/TestTranspose.cpp
similarity index 98%
rename from modules/gpu/test/nvidia/TestTranspose.cpp
rename to modules/gpunvidia/test/TestTranspose.cpp
index 5f71da4f8..3322a0758 100644
--- a/modules/gpu/test/nvidia/TestTranspose.cpp
+++ b/modules/gpunvidia/test/TestTranspose.cpp
@@ -40,11 +40,7 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
-#include <math.h>
-
-#include "TestTranspose.h"
+#include "test_precomp.hpp"
 
 
 template <class T>
@@ -179,5 +175,3 @@ bool TestTranspose<T>::deinit()
 
 template class TestTranspose<Ncv32u>;
 template class TestTranspose<Ncv64u>;
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/nvidia/TestTranspose.h b/modules/gpunvidia/test/TestTranspose.h
similarity index 100%
rename from modules/gpu/test/nvidia/TestTranspose.h
rename to modules/gpunvidia/test/TestTranspose.h
diff --git a/modules/gpu/test/nvidia/main_nvidia.cpp b/modules/gpunvidia/test/main_nvidia.cpp
similarity index 96%
rename from modules/gpu/test/nvidia/main_nvidia.cpp
rename to modules/gpunvidia/test/main_nvidia.cpp
index 2e36fbec9..1179b5b96 100644
--- a/modules/gpu/test/nvidia/main_nvidia.cpp
+++ b/modules/gpunvidia/test/main_nvidia.cpp
@@ -40,35 +40,12 @@
 //
 //M*/
 
+#include "test_precomp.hpp"
+
 #if defined _MSC_VER && _MSC_VER >= 1200
 # pragma warning (disable : 4408 4201 4100)
 #endif
 
-#if !defined CUDA_DISABLER
-
-#include <cstdio>
-
-#include "NCV.hpp"
-#include "NCVHaarObjectDetection.hpp"
-
-#include "TestIntegralImage.h"
-#include "TestIntegralImageSquared.h"
-#include "TestRectStdDev.h"
-#include "TestResize.h"
-#include "TestCompact.h"
-#include "TestTranspose.h"
-
-#include "TestDrawRects.h"
-#include "TestHypothesesGrow.h"
-#include "TestHypothesesFilter.h"
-#include "TestHaarCascadeLoader.h"
-#include "TestHaarCascadeApplication.h"
-
-#include "NCVAutoTestLister.hpp"
-#include "NCVTestSourceProvider.hpp"
-
-#include "main_test_nvidia.h"
-
 static std::string path;
 
 namespace {
@@ -480,5 +457,3 @@ bool nvidia_NCV_Visualization(const std::string& test_data_path, OutputLevel out
 
     return testListerVisualize.invoke();
 }
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/gpu/test/main_test_nvidia.h b/modules/gpunvidia/test/main_test_nvidia.h
similarity index 100%
rename from modules/gpu/test/main_test_nvidia.h
rename to modules/gpunvidia/test/main_test_nvidia.h
diff --git a/modules/gpu/test/main.cpp b/modules/gpunvidia/test/test_main.cpp
similarity index 100%
rename from modules/gpu/test/main.cpp
rename to modules/gpunvidia/test/test_main.cpp
diff --git a/modules/gpu/test/test_nvidia.cpp b/modules/gpunvidia/test/test_nvidia.cpp
similarity index 100%
rename from modules/gpu/test/test_nvidia.cpp
rename to modules/gpunvidia/test/test_nvidia.cpp
diff --git a/modules/gpunvidia/test/test_precomp.cpp b/modules/gpunvidia/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpunvidia/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpunvidia/test/test_precomp.hpp b/modules/gpunvidia/test/test_precomp.hpp
new file mode 100644
index 000000000..46acfc2ec
--- /dev/null
+++ b/modules/gpunvidia/test/test_precomp.hpp
@@ -0,0 +1,95 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__arm__)
+    #include <fpu_control.h>
+#endif
+
+#include <cfloat>
+#include <cstdio>
+#include <cmath>
+#include <vector>
+#include <string>
+#include <map>
+#include <memory>
+#include <algorithm>
+#include <fstream>
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/core/gpumat.hpp"
+#include "opencv2/gpunvidia.hpp"
+#include "opencv2/highgui.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#include "NCVTest.hpp"
+#include "NCVAutoTestLister.hpp"
+#include "NCVTestSourceProvider.hpp"
+
+#include "TestIntegralImage.h"
+#include "TestIntegralImageSquared.h"
+#include "TestRectStdDev.h"
+#include "TestResize.h"
+#include "TestCompact.h"
+#include "TestTranspose.h"
+#include "TestDrawRects.h"
+#include "TestHypothesesGrow.h"
+#include "TestHypothesesFilter.h"
+#include "TestHaarCascadeLoader.h"
+#include "TestHaarCascadeApplication.h"
+
+#include "main_test_nvidia.h"
+
+#endif
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 760bc26e6..3b0555366 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 
diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp
index 694825424..a4bc6a973 100644
--- a/samples/gpu/cascadeclassifier_nvidia_api.cpp
+++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp
@@ -11,7 +11,7 @@
 #include "opencv2/objdetect/objdetect_c.h"
 
 #ifdef HAVE_CUDA
-#include "NCVHaarObjectDetection.hpp"
+#include "opencv2/gpunvidia.hpp"
 #endif
 
 using namespace std;
diff --git a/samples/gpu/opticalflow_nvidia_api.cpp b/samples/gpu/opticalflow_nvidia_api.cpp
index 10663ce45..e4fc93cd5 100644
--- a/samples/gpu/opticalflow_nvidia_api.cpp
+++ b/samples/gpu/opticalflow_nvidia_api.cpp
@@ -16,8 +16,7 @@
 #include "opencv2/highgui/highgui_c.h"
 
 #ifdef HAVE_CUDA
-#include "NPP_staging/NPP_staging.hpp"
-#include "NCVBroxOpticalFlow.hpp"
+#include "opencv2/gpunvidia.hpp"
 #endif
 
 #if !defined(HAVE_CUDA)

From c2402053b949f1f5a1957c13363bf996d6c5ccd6 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 12:10:56 +0400
Subject: [PATCH 06/49] removed columnSum function (it is a duplicate for
 reduce)

---
 modules/gpu/include/opencv2/gpu.hpp         |  3 --
 modules/gpu/perf/perf_imgproc.cpp           | 26 -----------
 modules/gpu/src/cuda/imgproc.cu             | 35 ---------------
 modules/gpu/src/imgproc.cpp                 | 22 +---------
 modules/gpu/test/test_imgproc.cpp           | 48 ---------------------
 modules/gpuarithm/src/matrix_reductions.cpp |  1 -
 6 files changed, 1 insertion(+), 134 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 8f837da08..0c625de8a 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -184,9 +184,6 @@ CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer,
 //! supports source images of 8UC1 type only
 CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
 
-//! computes vertical sum, supports only CV_32FC1 images
-CV_EXPORTS void columnSum(const GpuMat& src, GpuMat& sum);
-
 //! computes the standard deviation of integral images
 //! supports only CV_32SC1 source type and CV_32FC1 sqr type
 //! output will have CV_32FC1 type
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index 0a24d24d3..9a1168a52 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -631,32 +631,6 @@ PERF_TEST_P(Sz_ClipLimit, ImgProc_CLAHE,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// ColumnSum
-
-PERF_TEST_P(Sz, ImgProc_ColumnSum,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_32FC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::columnSum(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // Canny
 
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu
index 71f5e872c..f209ab680 100644
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -582,41 +582,6 @@ namespace cv { namespace gpu { namespace cudev
                 cudaSafeCall(cudaDeviceSynchronize());
         }
 
-        ////////////////////////////// Column Sum //////////////////////////////////////
-
-        __global__ void column_sumKernel_32F(int cols, int rows, const PtrStepb src, const PtrStepb dst)
-        {
-            int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-            if (x < cols)
-            {
-                const unsigned char* src_data = src.data + x * sizeof(float);
-                unsigned char* dst_data = dst.data + x * sizeof(float);
-
-                float sum = 0.f;
-                for (int y = 0; y < rows; ++y)
-                {
-                    sum += *(const float*)src_data;
-                    *(float*)dst_data = sum;
-                    src_data += src.step;
-                    dst_data += dst.step;
-                }
-            }
-        }
-
-
-        void columnSum_32F(const PtrStepSzb src, const PtrStepSzb dst)
-        {
-            dim3 threads(256);
-            dim3 grid(divUp(src.cols, threads.x));
-
-            column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
-            cudaSafeCall( cudaGetLastError() );
-
-            cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-
         //////////////////////////////////////////////////////////////////////////
         // mulSpectrums
 
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index 96a62b86b..fa0ed03a7 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -59,7 +59,6 @@ void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int,
 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_no_cuda(); }
 void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_no_cuda(); }
 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
@@ -630,26 +629,7 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
 }
 
 //////////////////////////////////////////////////////////////////////////////
-// columnSum
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void columnSum_32F(const PtrStepSzb src, const PtrStepSzb dst);
-    }
-}}}
-
-void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
-{
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    CV_Assert(src.type() == CV_32F);
-
-    dst.create(src.size(), CV_32F);
-
-    cudev::imgproc::columnSum_32F(src, dst);
-}
+// rectStdDev
 
 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
 {
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp
index 740a8d930..a38f27b74 100644
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -261,54 +261,6 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CLAHE, testing::Combine(
     DIFFERENT_SIZES,
     testing::Values(0.0, 40.0)));
 
-////////////////////////////////////////////////////////////////////////
-// ColumnSum
-
-PARAM_TEST_CASE(ColumnSum, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(ColumnSum, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat d_dst;
-    cv::gpu::columnSum(loadMat(src), d_dst);
-
-    cv::Mat dst(d_dst);
-
-    for (int j = 0; j < src.cols; ++j)
-    {
-        float gold = src.at<float>(0, j);
-        float res = dst.at<float>(0, j);
-        ASSERT_NEAR(res, gold, 1e-5);
-    }
-
-    for (int i = 1; i < src.rows; ++i)
-    {
-        for (int j = 0; j < src.cols; ++j)
-        {
-            float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
-            float res = dst.at<float>(i, j);
-            ASSERT_NEAR(res, gold, 1e-5);
-        }
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES));
-
 ////////////////////////////////////////////////////////
 // Canny
 
diff --git a/modules/gpuarithm/src/matrix_reductions.cpp b/modules/gpuarithm/src/matrix_reductions.cpp
index b56cb20fe..027618dac 100644
--- a/modules/gpuarithm/src/matrix_reductions.cpp
+++ b/modules/gpuarithm/src/matrix_reductions.cpp
@@ -71,7 +71,6 @@ int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_no_cuda(); return 0; }
 void cv::gpu::reduce(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
 
 #else
-#include "opencv2/core/utility.hpp"
 
 namespace
 {

From ed006a0612d1740d4543c3dab18cfb1248065841 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 12:31:46 +0400
Subject: [PATCH 07/49] added private header to gpunvidia module

---
 modules/gpu/src/cuda/internal_shared.hpp      | 21 +---
 modules/gpu/src/cuda/safe_call.hpp            | 11 ---
 modules/gpu/src/error.cpp                     | 58 -----------
 .../include/opencv2/gpunvidia/private.hpp     | 96 +++++++++++++++++++
 modules/gpunvidia/src/NCV.cpp                 | 80 ++++++++++++++++
 modules/gpunvidia/src/precomp.hpp             |  1 +
 6 files changed, 179 insertions(+), 88 deletions(-)
 create mode 100644 modules/gpunvidia/include/opencv2/gpunvidia/private.hpp

diff --git a/modules/gpu/src/cuda/internal_shared.hpp b/modules/gpu/src/cuda/internal_shared.hpp
index c8d4e5b7f..ce2cfe465 100644
--- a/modules/gpu/src/cuda/internal_shared.hpp
+++ b/modules/gpu/src/cuda/internal_shared.hpp
@@ -48,28 +48,11 @@
 
 #include "opencv2/core/cuda_devptrs.hpp"
 #include "opencv2/core/cuda/common.hpp"
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpunvidia/private.hpp"
+
 
 #include "safe_call.hpp"
 
-namespace cv { namespace gpu
-{
-    class NppStStreamHandler
-    {
-    public:
-        inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
-        {
-            oldStream = nppStSetActiveCUDAstream(newStream);
-        }
 
-        inline ~NppStStreamHandler()
-        {
-            nppStSetActiveCUDAstream(oldStream);
-        }
-
-    private:
-        cudaStream_t oldStream;
-    };
-}}
 
 #endif /* __OPENCV_internal_shared_HPP__ */
diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp
index fa62fb15a..1d4a437e6 100644
--- a/modules/gpu/src/cuda/safe_call.hpp
+++ b/modules/gpu/src/cuda/safe_call.hpp
@@ -43,30 +43,19 @@
 #ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
 #define __OPENCV_CUDA_SAFE_CALL_HPP__
 
-#include <cuda_runtime_api.h>
 #include <cufft.h>
-#include "opencv2/gpunvidia.hpp"
 
 #if defined(__GNUC__)
-    #define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
     #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
 #else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define ncvSafeCall(expr)  ___ncvSafeCall(expr, __FILE__, __LINE__)
     #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__)
 #endif
 
 namespace cv { namespace gpu
 {
-    void ncvError(int err, const char *file, const int line, const char *func = "");
     void cufftError(int err, const char *file, const int line, const char *func = "");
 }}
 
-static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
-{
-    if (NCV_SUCCESS != err)
-        cv::gpu::ncvError(err, file, line, func);
-}
-
 static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
 {
     if (CUFFT_SUCCESS != err)
diff --git a/modules/gpu/src/error.cpp b/modules/gpu/src/error.cpp
index 36759864b..3b8b6b3ee 100644
--- a/modules/gpu/src/error.cpp
+++ b/modules/gpu/src/error.cpp
@@ -73,58 +73,6 @@ namespace
         return str;
     }
 
-    //////////////////////////////////////////////////////////////////////////
-    // NCV errors
-
-    const ErrorEntry ncv_errors [] =
-    {
-        error_entry( NCV_SUCCESS ),
-        error_entry( NCV_UNKNOWN_ERROR ),
-        error_entry( NCV_CUDA_ERROR ),
-        error_entry( NCV_NPP_ERROR ),
-        error_entry( NCV_FILE_ERROR ),
-        error_entry( NCV_NULL_PTR ),
-        error_entry( NCV_INCONSISTENT_INPUT ),
-        error_entry( NCV_TEXTURE_BIND_ERROR ),
-        error_entry( NCV_DIMENSIONS_INVALID ),
-        error_entry( NCV_INVALID_ROI ),
-        error_entry( NCV_INVALID_STEP ),
-        error_entry( NCV_INVALID_SCALE ),
-        error_entry( NCV_INVALID_SCALE ),
-        error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
-        error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
-        error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
-        error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
-        error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
-        error_entry( NCV_ALLOCATOR_BAD_REUSE ),
-        error_entry( NCV_MEM_COPY_ERROR ),
-        error_entry( NCV_MEM_RESIDENCE_ERROR ),
-        error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
-        error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
-        error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
-        error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
-        error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
-        error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
-        error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
-        error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
-        error_entry( NPPST_SUCCESS ),
-        error_entry( NPPST_ERROR ),
-        error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
-        error_entry( NPPST_NULL_POINTER_ERROR ),
-        error_entry( NPPST_TEXTURE_BIND_ERROR ),
-        error_entry( NPPST_MEMCPY_ERROR ),
-        error_entry( NPPST_MEM_ALLOC_ERR ),
-        error_entry( NPPST_MEMFREE_ERR ),
-        error_entry( NPPST_INVALID_ROI ),
-        error_entry( NPPST_INVALID_STEP ),
-        error_entry( NPPST_INVALID_SCALE ),
-        error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
-        error_entry( NPPST_MEM_RESIDENCE_ERROR ),
-        error_entry( NPPST_MEM_INTERNAL_ERROR )
-    };
-
-    const size_t ncv_error_num = sizeof(ncv_errors) / sizeof(ncv_errors[0]);
-
     //////////////////////////////////////////////////////////////////////////
     // CUFFT errors
 
@@ -148,12 +96,6 @@ namespace cv
 {
     namespace gpu
     {
-        void ncvError(int code, const char* file, const int line, const char* func)
-        {
-            String msg = getErrorString(code, ncv_errors, ncv_error_num);
-            cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
-        }
-
         void cufftError(int code, const char* file, const int line, const char* func)
         {
             String msg = getErrorString(code, cufft_errors, cufft_error_num);
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp
new file mode 100644
index 000000000..e86aaacf3
--- /dev/null
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp
@@ -0,0 +1,96 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_GPUNVIDIA_PRIVATE_HPP__
+#define __OPENCV_CORE_GPUNVIDIA_PRIVATE_HPP__
+
+#ifndef __OPENCV_BUILD
+#  error this is a private header which should not be used from outside of the OpenCV library
+#endif
+
+#include "opencv2/core/gpu_private.hpp"
+
+#ifndef HAVE_CUDA
+#  error gpunvidia module requires CUDA
+#endif
+
+#include "opencv2/gpunvidia.hpp"
+
+namespace cv { namespace gpu
+{
+    class NppStStreamHandler
+    {
+    public:
+        inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
+        {
+            oldStream = nppStSetActiveCUDAstream(newStream);
+        }
+
+        inline ~NppStStreamHandler()
+        {
+            nppStSetActiveCUDAstream(oldStream);
+        }
+
+    private:
+        cudaStream_t oldStream;
+    };
+
+    NCV_EXPORTS cv::String getNcvErrorMessage(int code);
+
+    static inline void checkNcvError(int err, const char* file, const int line, const char* func)
+    {
+        if (NCV_SUCCESS != err)
+        {
+            cv::String msg = getNcvErrorMessage(err);
+            cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
+        }
+    }
+}}
+
+#if defined(__GNUC__)
+    #define ncvSafeCall(expr)  cv::gpu::checkNcvError(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+    #define ncvSafeCall(expr)  cv::gpu::checkNcvError(expr, __FILE__, __LINE__, "")
+#endif
+
+#endif // __OPENCV_CORE_GPUNVIDIA_PRIVATE_HPP__
diff --git a/modules/gpunvidia/src/NCV.cpp b/modules/gpunvidia/src/NCV.cpp
index 979276c7c..bf1531d7f 100644
--- a/modules/gpunvidia/src/NCV.cpp
+++ b/modules/gpunvidia/src/NCV.cpp
@@ -48,6 +48,86 @@
 //
 //==============================================================================
 
+namespace
+{
+    #define error_entry(entry)  { entry, #entry }
+
+    struct ErrorEntry
+    {
+        int code;
+        const char* str;
+    };
+
+    struct ErrorEntryComparer
+    {
+        int code;
+        ErrorEntryComparer(int code_) : code(code_) {}
+        bool operator()(const ErrorEntry& e) const { return e.code == code; }
+    };
+
+    //////////////////////////////////////////////////////////////////////////
+    // NCV errors
+
+    const ErrorEntry ncv_errors [] =
+    {
+        error_entry( NCV_SUCCESS ),
+        error_entry( NCV_UNKNOWN_ERROR ),
+        error_entry( NCV_CUDA_ERROR ),
+        error_entry( NCV_NPP_ERROR ),
+        error_entry( NCV_FILE_ERROR ),
+        error_entry( NCV_NULL_PTR ),
+        error_entry( NCV_INCONSISTENT_INPUT ),
+        error_entry( NCV_TEXTURE_BIND_ERROR ),
+        error_entry( NCV_DIMENSIONS_INVALID ),
+        error_entry( NCV_INVALID_ROI ),
+        error_entry( NCV_INVALID_STEP ),
+        error_entry( NCV_INVALID_SCALE ),
+        error_entry( NCV_INVALID_SCALE ),
+        error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
+        error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
+        error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
+        error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
+        error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
+        error_entry( NCV_ALLOCATOR_BAD_REUSE ),
+        error_entry( NCV_MEM_COPY_ERROR ),
+        error_entry( NCV_MEM_RESIDENCE_ERROR ),
+        error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
+        error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
+        error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
+        error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
+        error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
+        error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
+        error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
+        error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
+        error_entry( NPPST_SUCCESS ),
+        error_entry( NPPST_ERROR ),
+        error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
+        error_entry( NPPST_NULL_POINTER_ERROR ),
+        error_entry( NPPST_TEXTURE_BIND_ERROR ),
+        error_entry( NPPST_MEMCPY_ERROR ),
+        error_entry( NPPST_MEM_ALLOC_ERR ),
+        error_entry( NPPST_MEMFREE_ERR ),
+        error_entry( NPPST_INVALID_ROI ),
+        error_entry( NPPST_INVALID_STEP ),
+        error_entry( NPPST_INVALID_SCALE ),
+        error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
+        error_entry( NPPST_MEM_RESIDENCE_ERROR ),
+        error_entry( NPPST_MEM_INTERNAL_ERROR )
+    };
+
+    const size_t ncv_error_num = sizeof(ncv_errors) / sizeof(ncv_errors[0]);
+}
+
+cv::String cv::gpu::getNcvErrorMessage(int code)
+{
+    size_t idx = std::find_if(ncv_errors, ncv_errors + ncv_error_num, ErrorEntryComparer(code)) - ncv_errors;
+
+    const char* msg = (idx != ncv_error_num) ? ncv_errors[idx].str : "Unknown error code";
+    String str = cv::format("%s [Code = %d]", msg, code);
+
+    return str;
+}
+
 
 static void stdDebugOutput(const cv::String &msg)
 {
diff --git a/modules/gpunvidia/src/precomp.hpp b/modules/gpunvidia/src/precomp.hpp
index 613670009..c4e067f52 100644
--- a/modules/gpunvidia/src/precomp.hpp
+++ b/modules/gpunvidia/src/precomp.hpp
@@ -52,5 +52,6 @@
 #include "opencv2/objdetect.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
+#include "opencv2/gpunvidia/private.hpp"
 
 #endif /* __OPENCV_PRECOMP_H__ */

From e65471497387186a36185a4b4aba15af73dbc892 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 12:38:43 +0400
Subject: [PATCH 08/49] replaced NCV_EXPORTS with CV_EXPORTS

---
 .../include/opencv2/gpunvidia/NCV.hpp         |  42 +++----
 .../opencv2/gpunvidia/NCVBroxOpticalFlow.hpp  |   2 +-
 .../gpunvidia/NCVHaarObjectDetection.hpp      |  16 +--
 .../include/opencv2/gpunvidia/NCVPyramid.hpp  |   4 +-
 .../include/opencv2/gpunvidia/NPP_staging.hpp | 108 +++++++++---------
 .../include/opencv2/gpunvidia/private.hpp     |   2 +-
 modules/gpunvidia/src/NCV.cpp                 |   7 --
 7 files changed, 85 insertions(+), 96 deletions(-)

diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp
index e029e1fc3..e993c6408 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp
@@ -43,11 +43,7 @@
 #ifndef _ncv_hpp_
 #define _ncv_hpp_
 
-#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS
-    #define NCV_EXPORTS __declspec(dllexport)
-#else
-    #define NCV_EXPORTS
-#endif
+#include "opencv2/core/cvdef.h"
 
 #ifdef _WIN32
     #define WIN32_LEAN_AND_MEAN
@@ -244,13 +240,13 @@ const Ncv32u K_LOG2_WARP_SIZE = 5;
 //==============================================================================
 
 
-NCV_EXPORTS void ncvDebugOutput(const cv::String &msg);
+CV_EXPORTS void ncvDebugOutput(const cv::String &msg);
 
 
 typedef void NCVDebugOutputHandler(const cv::String &msg);
 
 
-NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
+CV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
 
 
 #define ncvAssertPrintCheck(pred, msg) \
@@ -401,11 +397,11 @@ typedef Ncv32u NCVStatus;
 
 typedef struct _NcvTimer *NcvTimer;
 
-NCV_EXPORTS NcvTimer ncvStartTimer(void);
+CV_EXPORTS NcvTimer ncvStartTimer(void);
 
-NCV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
+CV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
 
-NCV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
+CV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
 
 
 //==============================================================================
@@ -418,7 +414,7 @@ NCV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
 /**
 * Calculates the aligned top bound value
 */
-NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
+CV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
 
 
 /**
@@ -436,7 +432,7 @@ enum NCVMemoryType
 /**
 * NCVMemPtr
 */
-struct NCV_EXPORTS NCVMemPtr
+struct CV_EXPORTS NCVMemPtr
 {
     void *ptr;
     NCVMemoryType memtype;
@@ -447,7 +443,7 @@ struct NCV_EXPORTS NCVMemPtr
 /**
 * NCVMemSegment
 */
-struct NCV_EXPORTS NCVMemSegment
+struct CV_EXPORTS NCVMemSegment
 {
     NCVMemPtr begin;
     size_t size;
@@ -458,7 +454,7 @@ struct NCV_EXPORTS NCVMemSegment
 /**
 * INCVMemAllocator (Interface)
 */
-class NCV_EXPORTS INCVMemAllocator
+class CV_EXPORTS INCVMemAllocator
 {
 public:
     virtual ~INCVMemAllocator() = 0;
@@ -480,7 +476,7 @@ inline INCVMemAllocator::~INCVMemAllocator() {}
 /**
 * NCVMemStackAllocator
 */
-class NCV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
+class CV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
 {
     NCVMemStackAllocator();
     NCVMemStackAllocator(const NCVMemStackAllocator &);
@@ -517,7 +513,7 @@ private:
 /**
 * NCVMemNativeAllocator
 */
-class NCV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
+class CV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
 {
 public:
 
@@ -549,12 +545,12 @@ private:
 /**
 * Copy dispatchers
 */
-NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
+CV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
                                        const void *src, NCVMemoryType srcType,
                                        size_t sz, cudaStream_t cuStream);
 
 
-NCV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
+CV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
                                          const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
                                          Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
 
@@ -990,23 +986,23 @@ private:
 /**
 * Operations with rectangles
 */
-NCV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
+CV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
                                               Ncv32u minNeighbors, Ncv32f intersectEps, NCVVector<Ncv32u> *hypothesesWeights);
 
 
-NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+CV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
                                            NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
 
 
-NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+CV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
                                             NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
 
 
-NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+CV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
                                              NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
 
 
-NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+CV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
                                               NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
 
 
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
index 3300e1006..0634fff8f 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
@@ -92,7 +92,7 @@ struct NCVBroxOpticalFlowDescriptor
 /// \return                        computation status
 /////////////////////////////////////////////////////////////////////////////////////////
 
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
                              INCVMemAllocator &gpu_mem_allocator,
                              const NCVMatrix<Ncv32f> &frame0,
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
index c067a9135..323c629e6 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
@@ -355,7 +355,7 @@ enum
 };
 
 
-NCV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
+CV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
                                                         NcvSize32u srcRoi,
                                                         NCVVector<NcvRect32u> &d_dstRects,
                                                         Ncv32u &dstNumRects,
@@ -382,7 +382,7 @@ NCV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcI
 #define HAAR_STDDEV_BORDER                  1
 
 
-NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
+CV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
                                                            NCVMatrix<Ncv32f> &d_weights,
                                                            NCVMatrixAlloc<Ncv32u> &d_pixelMask,
                                                            Ncv32u &numDetections,
@@ -401,7 +401,7 @@ NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_
                                                            cudaStream_t cuStream);
 
 
-NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
+CV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
                                                          NCVMatrix<Ncv32f> &h_weights,
                                                          NCVMatrixAlloc<Ncv32u> &h_pixelMask,
                                                          Ncv32u &numDetections,
@@ -418,7 +418,7 @@ NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_in
 #define RECT_SIMILARITY_PROPORTION      0.2f
 
 
-NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
+CV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
                                                      Ncv32u numPixelMaskDetections,
                                                      NCVVector<NcvRect32u> &hypotheses,
                                                      Ncv32u &totalDetections,
@@ -429,7 +429,7 @@ NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMas
                                                      cudaStream_t cuStream);
 
 
-NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
+CV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
                                                    Ncv32u numPixelMaskDetections,
                                                    NCVVector<NcvRect32u> &hypotheses,
                                                    Ncv32u &totalDetections,
@@ -439,18 +439,18 @@ NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
                                                    Ncv32f curScale);
 
 
-NCV_EXPORTS NCVStatus ncvHaarGetClassifierSize(const cv::String &filename, Ncv32u &numStages,
+CV_EXPORTS NCVStatus ncvHaarGetClassifierSize(const cv::String &filename, Ncv32u &numStages,
                                                Ncv32u &numNodes, Ncv32u &numFeatures);
 
 
-NCV_EXPORTS NCVStatus ncvHaarLoadFromFile_host(const cv::String &filename,
+CV_EXPORTS NCVStatus ncvHaarLoadFromFile_host(const cv::String &filename,
                                                HaarClassifierCascadeDescriptor &haar,
                                                NCVVector<HaarStage64> &h_HaarStages,
                                                NCVVector<HaarClassifierNode128> &h_HaarNodes,
                                                NCVVector<HaarFeature64> &h_HaarFeatures);
 
 
-NCV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const cv::String &filename,
+CV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const cv::String &filename,
                                              HaarClassifierCascadeDescriptor haar,
                                              NCVVector<HaarStage64> &h_HaarStages,
                                              NCVVector<HaarClassifierNode128> &h_HaarNodes,
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
index c88dbc271..91972c575 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
@@ -50,7 +50,7 @@
 #if 0 //def _WIN32
 
 template <class T>
-class NCV_EXPORTS NCVMatrixStack
+class CV_EXPORTS NCVMatrixStack
 {
 public:
     NCVMatrixStack() {this->_arr.clear();}
@@ -71,7 +71,7 @@ private:
 
 
 template <class T>
-class NCV_EXPORTS NCVImagePyramid
+class CV_EXPORTS NCVImagePyramid
 {
 public:
 
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
index 823be6943..2df393a10 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
@@ -63,7 +63,7 @@
  * NOT THREAD SAFE
  * \return Current CUDA stream
  */
-NCV_EXPORTS
+CV_EXPORTS
 cudaStream_t nppStGetActiveCUDAstream();
 
 
@@ -73,7 +73,7 @@ cudaStream_t nppStGetActiveCUDAstream();
  * \param cudaStream        [IN] cudaStream CUDA stream to become current
  * \return CUDA stream used before
  */
-NCV_EXPORTS
+CV_EXPORTS
 cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
 
 
@@ -142,7 +142,7 @@ enum NppStInterpMode
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStGetInterpolationBufferSize(NcvSize32u srcSize,
                                            Ncv32u nStep,
                                            Ncv32u *hpSize);
@@ -155,7 +155,7 @@ NCVStatus nppiStGetInterpolationBufferSize(NcvSize32u srcSize,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState);
 
 
@@ -177,7 +177,7 @@ NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState);
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
                                         NcvSize32u srcSize,
                                         Ncv32u nSrcStep,
@@ -210,7 +210,7 @@ NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
                                            NcvSize32u srcSize,
                                            Ncv32u nSrcStep,
@@ -233,7 +233,7 @@ NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStVectorWarpGetBufferSize(NcvSize32u srcSize,
                                         Ncv32u nSrcStep,
                                         Ncv32u *hpSize);
@@ -259,7 +259,7 @@ NCVStatus nppiStVectorWarpGetBufferSize(NcvSize32u srcSize,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
                                          NcvSize32u srcSize,
                                          Ncv32u nSrcStep,
@@ -290,7 +290,7 @@ NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
                                          NcvSize32u srcSize,
                                          Ncv32u nSrcStep,
@@ -320,7 +320,7 @@ NCVStatus nppiStVectorWarp_PSF2x2_32f_C1(const Ncv32f *pSrc,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
                                NcvSize32u srcSize,
                                Ncv32u nSrcStep,
@@ -347,7 +347,7 @@ NCVStatus nppiStResize_32f_C1R(const Ncv32f *pSrc,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
                                  Ncv32u *d_dst, Ncv32u dstStep,
                                  NcvSize32u srcRoi, Ncv32u scale,
@@ -358,7 +358,7 @@ NCVStatus nppiStDecimate_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
  * \see nppiStDecimate_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
                                  Ncv32s *d_dst, Ncv32u dstStep,
                                  NcvSize32u srcRoi, Ncv32u scale,
@@ -369,7 +369,7 @@ NCVStatus nppiStDecimate_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
  * \see nppiStDecimate_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
                                  Ncv32f *d_dst, Ncv32u dstStep,
                                  NcvSize32u srcRoi, Ncv32u scale,
@@ -380,7 +380,7 @@ NCVStatus nppiStDecimate_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
 * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
 * \see nppiStDecimate_32u_C1R
 */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
                                  Ncv64u *d_dst, Ncv32u dstStep,
                                  NcvSize32u srcRoi, Ncv32u scale,
@@ -391,7 +391,7 @@ NCVStatus nppiStDecimate_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
  * \see nppiStDecimate_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
                                  Ncv64s *d_dst, Ncv32u dstStep,
                                  NcvSize32u srcRoi, Ncv32u scale,
@@ -402,7 +402,7 @@ NCVStatus nppiStDecimate_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
  * \see nppiStDecimate_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
                                  Ncv64f *d_dst, Ncv32u dstStep,
                                  NcvSize32u srcRoi, Ncv32u scale,
@@ -421,7 +421,7 @@ NCVStatus nppiStDecimate_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
                                       Ncv32u *h_dst, Ncv32u dstStep,
                                       NcvSize32u srcRoi, Ncv32u scale);
@@ -431,7 +431,7 @@ NCVStatus nppiStDecimate_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
  * \see nppiStDecimate_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
                                       Ncv32s *h_dst, Ncv32u dstStep,
                                       NcvSize32u srcRoi, Ncv32u scale);
@@ -441,7 +441,7 @@ NCVStatus nppiStDecimate_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
  * \see nppiStDecimate_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
                                       Ncv32f *h_dst, Ncv32u dstStep,
                                       NcvSize32u srcRoi, Ncv32u scale);
@@ -451,7 +451,7 @@ NCVStatus nppiStDecimate_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
  * \see nppiStDecimate_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
                                       Ncv64u *h_dst, Ncv32u dstStep,
                                       NcvSize32u srcRoi, Ncv32u scale);
@@ -461,7 +461,7 @@ NCVStatus nppiStDecimate_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
  * \see nppiStDecimate_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
                                       Ncv64s *h_dst, Ncv32u dstStep,
                                       NcvSize32u srcRoi, Ncv32u scale);
@@ -471,7 +471,7 @@ NCVStatus nppiStDecimate_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
  * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
  * \see nppiStDecimate_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStDecimate_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
                                       Ncv64f *h_dst, Ncv32u dstStep,
                                       NcvSize32u srcRoi, Ncv32u scale);
@@ -493,7 +493,7 @@ NCVStatus nppiStDecimate_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
                                    Ncv64u *d_sqsum, Ncv32u sqsumStep,
                                    Ncv32f *d_norm, Ncv32u normStep,
@@ -516,7 +516,7 @@ NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,
                                         Ncv64u *h_sqsum, Ncv32u sqsumStep,
                                         Ncv32f *h_norm, Ncv32u normStep,
@@ -535,7 +535,7 @@ NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,
                                   Ncv32u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -544,7 +544,7 @@ NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,
  * Transposes an image. 32-bit signed pixels, single channel
  * \see nppiStTranspose_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,
                                   Ncv32s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -553,7 +553,7 @@ NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,
  * Transposes an image. 32-bit float pixels, single channel
  * \see nppiStTranspose_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,
                                   Ncv32f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -562,7 +562,7 @@ NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,
  * Transposes an image. 64-bit unsigned pixels, single channel
  * \see nppiStTranspose_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,
                                   Ncv64u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -571,7 +571,7 @@ NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,
  * Transposes an image. 64-bit signed pixels, single channel
  * \see nppiStTranspose_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,
                                   Ncv64s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -580,7 +580,7 @@ NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,
  * Transposes an image. 64-bit float pixels, single channel
  * \see nppiStTranspose_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
                                   Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -589,7 +589,7 @@ NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
  * Transposes an image. 128-bit pixels of any type, single channel
  * \see nppiStTranspose_32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_128_C1R(void *d_src, Ncv32u srcStep,
                                   void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
 
@@ -605,7 +605,7 @@ NCVStatus nppiStTranspose_128_C1R(void *d_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,
                                        Ncv32u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -614,7 +614,7 @@ NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,
  * Transposes an image. 32-bit signed pixels, single channel. Host implementation
  * \see nppiStTranspose_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,
                                        Ncv32s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -623,7 +623,7 @@ NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,
  * Transposes an image. 32-bit float pixels, single channel. Host implementation
  * \see nppiStTranspose_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,
                                        Ncv32f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -632,7 +632,7 @@ NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,
  * Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
  * \see nppiStTranspose_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,
                                        Ncv64u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -641,7 +641,7 @@ NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,
  * Transposes an image. 64-bit signed pixels, single channel. Host implementation
  * \see nppiStTranspose_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,
                                        Ncv64s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -650,7 +650,7 @@ NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,
  * Transposes an image. 64-bit float pixels, single channel. Host implementation
  * \see nppiStTranspose_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
                                        Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
 
@@ -659,7 +659,7 @@ NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
  * Transposes an image. 128-bit pixels of any type, single channel. Host implementation
  * \see nppiStTranspose_32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStTranspose_128_C1R_host(void *d_src, Ncv32u srcStep,
                                        void *d_dst, Ncv32u dstStep, NcvSize32u srcRoi);
 
@@ -673,7 +673,7 @@ NCVStatus nppiStTranspose_128_C1R_host(void *d_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
 
 
@@ -681,7 +681,7 @@ NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cuda
  * Calculates the size of the temporary buffer for integral image creation
  * \see nppiStIntegralGetSize_8u32u
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
 
 
@@ -699,7 +699,7 @@ NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cud
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
                                    Ncv32u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
                                    Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
@@ -709,7 +709,7 @@ NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
  * Creates an integral image representation for the input image
  * \see nppiStIntegral_8u32u_C1R
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
                                     Ncv32f *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
                                     Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
@@ -726,7 +726,7 @@ NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
                                         Ncv32u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
 
@@ -735,7 +735,7 @@ NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
  * Creates an integral image representation for the input image. Host implementation
  * \see nppiStIntegral_8u32u_C1R_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
                                          Ncv32f *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
 
@@ -749,7 +749,7 @@ NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
 
 
@@ -767,7 +767,7 @@ NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, c
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
                                       Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
                                       Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
@@ -784,7 +784,7 @@ NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
                                            Ncv64u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
 
@@ -806,7 +806,7 @@ NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
 
 
@@ -838,7 +838,7 @@ NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDevicePr
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
                             Ncv32u *d_dst, Ncv32u *p_dstLen,
                             Ncv32u elemRemove, Ncv8u *pBuffer,
@@ -849,7 +849,7 @@ NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
  * Compacts the input vector by removing elements of specified value. 32-bit signed values
  * \see nppsStCompact_32u
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
                             Ncv32s *d_dst, Ncv32u *p_dstLen,
                             Ncv32s elemRemove, Ncv8u *pBuffer,
@@ -860,7 +860,7 @@ NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
  * Compacts the input vector by removing elements of specified value. 32-bit float values
  * \see nppsStCompact_32u
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
                             Ncv32f *d_dst, Ncv32u *p_dstLen,
                             Ncv32f elemRemove, Ncv8u *pBuffer,
@@ -878,7 +878,7 @@ NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
  *
  * \return NCV status code
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
                                  Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove);
 
@@ -887,7 +887,7 @@ NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
  * Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
  * \see nppsStCompact_32u_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
                                  Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove);
 
@@ -896,7 +896,7 @@ NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
  * Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
  * \see nppsStCompact_32u_host
  */
-NCV_EXPORTS
+CV_EXPORTS
 NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
                                  Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove);
 
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp
index e86aaacf3..f23e53604 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp
@@ -75,7 +75,7 @@ namespace cv { namespace gpu
         cudaStream_t oldStream;
     };
 
-    NCV_EXPORTS cv::String getNcvErrorMessage(int code);
+    CV_EXPORTS cv::String getNcvErrorMessage(int code);
 
     static inline void checkNcvError(int err, const char* file, const int line, const char* func)
     {
diff --git a/modules/gpunvidia/src/NCV.cpp b/modules/gpunvidia/src/NCV.cpp
index bf1531d7f..c31f2a3b4 100644
--- a/modules/gpunvidia/src/NCV.cpp
+++ b/modules/gpunvidia/src/NCV.cpp
@@ -755,13 +755,6 @@ static void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThresh
 }
 
 
-//===================================================================
-//
-// Operations with rectangles
-//
-//===================================================================
-
-
 
 NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses,
                                   Ncv32u &numHypotheses,

From ca474de6396f9890fea2c15654cd885de9e935e4 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 12:50:22 +0400
Subject: [PATCH 09/49] made objdetect dependecy for gpunvidia optional

---
 modules/gpunvidia/CMakeLists.txt                |  2 +-
 modules/gpunvidia/src/NCV.cpp                   |  8 ++++++++
 .../src/cuda/NCVHaarObjectDetection.cu          | 17 ++++++++++++++++-
 modules/gpunvidia/src/precomp.hpp               |  7 ++++++-
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/modules/gpunvidia/CMakeLists.txt b/modules/gpunvidia/CMakeLists.txt
index 7c1542430..3f4e4f6a6 100644
--- a/modules/gpunvidia/CMakeLists.txt
+++ b/modules/gpunvidia/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Computer Vision (HAL module)")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpunvidia opencv_core opencv_objdetect)
+ocv_define_module(gpunvidia opencv_core OPTIONAL opencv_objdetect)
diff --git a/modules/gpunvidia/src/NCV.cpp b/modules/gpunvidia/src/NCV.cpp
index c31f2a3b4..be82423d0 100644
--- a/modules/gpunvidia/src/NCV.cpp
+++ b/modules/gpunvidia/src/NCV.cpp
@@ -737,6 +737,13 @@ struct RectConvert
 
 static void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
 {
+#ifndef HAVE_OPENCV_OBJDETECT
+    (void) hypotheses;
+    (void) groupThreshold;
+    (void) eps;
+    (void) weights;
+    CV_Error(cv::Error::StsNotImplemented, "This functionality requires objdetect module");
+#else
     std::vector<cv::Rect> rects(hypotheses.size());
     std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
 
@@ -752,6 +759,7 @@ static void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThresh
     }
     std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
     hypotheses.resize(rects.size());
+#endif
 }
 
 
diff --git a/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu b/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
index 9ab0194a4..5296f24a3 100644
--- a/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
+++ b/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
@@ -61,7 +61,12 @@
 
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
-#include "opencv2/objdetect.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_OBJDETECT
+#  include "opencv2/objdetect.hpp"
+#endif
 
 #include "opencv2/gpunvidia/NCV.hpp"
 #include "opencv2/gpunvidia/NPP_staging.hpp"
@@ -2106,6 +2111,15 @@ static NCVStatus loadFromXML(const cv::String &filename,
                       std::vector<HaarClassifierNode128> &haarClassifierNodes,
                       std::vector<HaarFeature64> &haarFeatures)
 {
+#ifndef HAVE_OPENCV_OBJDETECT
+    (void) filename;
+    (void) haar;
+    (void) haarStages;
+    (void) haarClassifierNodes;
+    (void) haarFeatures;
+    CV_Error(cv::Error::StsNotImplemented, "This functionality requires objdetect module");
+    return NCV_HAAR_XML_LOADING_EXCEPTION;
+#else
     NCVStatus ncvStat;
 
     haar.NumStages = 0;
@@ -2294,6 +2308,7 @@ static NCVStatus loadFromXML(const cv::String &filename,
     }
 
     return NCV_SUCCESS;
+#endif
 }
 
 
diff --git a/modules/gpunvidia/src/precomp.hpp b/modules/gpunvidia/src/precomp.hpp
index c4e067f52..106d0a321 100644
--- a/modules/gpunvidia/src/precomp.hpp
+++ b/modules/gpunvidia/src/precomp.hpp
@@ -49,7 +49,12 @@
 
 #include "opencv2/gpunvidia.hpp"
 #include "opencv2/core/utility.hpp"
-#include "opencv2/objdetect.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_OBJDETECT
+#  include "opencv2/objdetect.hpp"
+#endif
 
 #include "opencv2/core/gpu_private.hpp"
 #include "opencv2/gpunvidia/private.hpp"

From d08ebfe4d315fb0446ebe15aba920c54c5197315 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 13:34:54 +0400
Subject: [PATCH 10/49] moved rectStdDev to gpuarithm

---
 modules/gpu/include/opencv2/gpu.hpp           |  5 ---
 modules/gpu/src/imgproc.cpp                   | 31 -------------------
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |  5 +++
 modules/gpuarithm/src/matrix_reductions.cpp   | 31 +++++++++++++++++++
 4 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 0c625de8a..6605a5840 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -184,11 +184,6 @@ CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer,
 //! supports source images of 8UC1 type only
 CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
 
-//! computes the standard deviation of integral images
-//! supports only CV_32SC1 source type and CV_32FC1 sqr type
-//! output will have CV_32FC1 type
-CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
-
 //! computes Harris cornerness criteria at each image pixel
 CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
 CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index fa0ed03a7..a3c9694fa 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -59,7 +59,6 @@ void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int,
 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_no_cuda(); }
 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
@@ -628,36 +627,6 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
         cudaSafeCall( cudaDeviceSynchronize() );
 }
 
-//////////////////////////////////////////////////////////////////////////////
-// rectStdDev
-
-void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
-{
-    CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);
-
-    dst.create(src.size(), CV_32FC1);
-
-    NppiSize sz;
-    sz.width = src.cols;
-    sz.height = src.rows;
-
-    NppiRect nppRect;
-    nppRect.height = rect.height;
-    nppRect.width = rect.width;
-    nppRect.x = rect.x;
-    nppRect.y = rect.y;
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    NppStreamHandler h(stream);
-
-    nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp64f>(), static_cast<int>(sqr.step),
-                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-}
-
 
 ////////////////////////////////////////////////////////////////////////
 // Histogram
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index 57d9abfbf..5724cd53c 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -274,6 +274,11 @@ CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, in
 //! applies fixed threshold to the image
 CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
 
+//! computes the standard deviation of integral images
+//! supports only CV_32SC1 source type and CV_32FC1 sqr type
+//! output will have CV_32FC1 type
+CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
+
 }} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPUARITHM_HPP__ */
diff --git a/modules/gpuarithm/src/matrix_reductions.cpp b/modules/gpuarithm/src/matrix_reductions.cpp
index 027618dac..dbb6c0945 100644
--- a/modules/gpuarithm/src/matrix_reductions.cpp
+++ b/modules/gpuarithm/src/matrix_reductions.cpp
@@ -69,6 +69,7 @@ void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const G
 int cv::gpu::countNonZero(const GpuMat&) { throw_no_cuda(); return 0; }
 int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_no_cuda(); return 0; }
 void cv::gpu::reduce(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -696,4 +697,34 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
     }
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// rectStdDev
+
+void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
+{
+    CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);
+
+    dst.create(src.size(), CV_32FC1);
+
+    NppiSize sz;
+    sz.width = src.cols;
+    sz.height = src.rows;
+
+    NppiRect nppRect;
+    nppRect.height = rect.height;
+    nppRect.width = rect.width;
+    nppRect.x = rect.x;
+    nppRect.y = rect.y;
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    NppStreamHandler h(stream);
+
+    nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp64f>(), static_cast<int>(sqr.step),
+                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
+
+    if (stream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
+}
+
 #endif

From 10ac854358c5a1418959e19f901e5993a73c49f5 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 13:53:22 +0400
Subject: [PATCH 11/49] moved copyMakeBorder to gpuarithm module

---
 modules/gpu/include/opencv2/gpu.hpp           |   4 -
 modules/gpu/perf/perf_imgproc.cpp             |  40 -------
 modules/gpu/src/imgproc.cpp                   | 110 ------------------
 modules/gpu/test/test_copy_make_border.cpp    | 106 -----------------
 modules/gpuarithm/CMakeLists.txt              |   2 +-
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |   4 +
 modules/gpuarithm/perf/perf_core.cpp          |  44 +++++++
 modules/gpuarithm/perf/perf_precomp.hpp       |   6 +
 modules/gpuarithm/src/arithm.cpp              | 110 ++++++++++++++++++
 .../src/cuda/copy_make_border.cu              |   0
 modules/gpuarithm/test/test_core.cpp          |  64 ++++++++++
 modules/gpuarithm/test/test_precomp.hpp       |   6 +
 12 files changed, 235 insertions(+), 261 deletions(-)
 delete mode 100644 modules/gpu/test/test_copy_make_border.cpp
 rename modules/{gpu => gpuarithm}/src/cuda/copy_make_border.cu (100%)

diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 6605a5840..642c327a3 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -168,10 +168,6 @@ CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K
 CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
                        int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
 
-//! copies 2D array to a larger destination array and pads borders with user-specifiable constant
-CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType,
-                               const Scalar& value = Scalar(), Stream& stream = Stream::Null());
-
 //! computes the integral image
 //! sum will have CV_32S type, but will contain unsigned int values
 //! supports only CV_8UC1 source type
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index 9a1168a52..02a1e6cd4 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -325,46 +325,6 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// CopyMakeBorder
-
-DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode);
-
-PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4,
-                    ALL_BORDER_MODES))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int borderMode = GET_PARAM(3);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::copyMakeBorder(d_src, dst, 5, 5, 5, 5, borderMode);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // Threshold
 
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index a3c9694fa..54a693700 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -51,7 +51,6 @@ void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria,
 void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
 void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
@@ -235,115 +234,6 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q,
     funcs[dst_cn == 4][disp.type()](disp, xyz, Q.ptr<float>(), StreamAccessor::getStream(stream));
 }
 
-////////////////////////////////////////////////////////////////////////
-// copyMakeBorder
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
-    }
-}}}
-
-namespace
-{
-    template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
-    {
-        using namespace ::cv::gpu::cudev::imgproc;
-
-        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
-
-        copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);
-    }
-}
-
-#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
-typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
-#else
-typedef Npp32s Npp32s_a;
-#endif
-
-void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)
-{
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
-
-    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
-    {
-        NppiSize srcsz;
-        srcsz.width  = src.cols;
-        srcsz.height = src.rows;
-
-        NppiSize dstsz;
-        dstsz.width  = dst.cols;
-        dstsz.height = dst.rows;
-
-        NppStreamHandler h(stream);
-
-        switch (src.type())
-        {
-        case CV_8UC1:
-            {
-                Npp8u nVal = saturate_cast<Npp8u>(value[0]);
-                nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        case CV_8UC4:
-            {
-                Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};
-                nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        case CV_32SC1:
-            {
-                Npp32s nVal = saturate_cast<Npp32s>(value[0]);
-                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        case CV_32FC1:
-            {
-                Npp32f val = saturate_cast<Npp32f>(value[0]);
-                Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
-                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        }
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-    else
-    {
-        typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
-        static const caller_t callers[6][4] =
-        {
-            {   copyMakeBorder_caller<uchar, 1>  ,    copyMakeBorder_caller<uchar, 2>   ,    copyMakeBorder_caller<uchar, 3>  ,    copyMakeBorder_caller<uchar, 4>},
-            {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},
-            {   copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/,    copyMakeBorder_caller<ushort, 3> ,    copyMakeBorder_caller<ushort, 4>},
-            {   copyMakeBorder_caller<short, 1>  , 0/*copyMakeBorder_caller<short, 2>*/ ,    copyMakeBorder_caller<short, 3>  ,    copyMakeBorder_caller<short, 4>},
-            {0/*copyMakeBorder_caller<int,   1>*/, 0/*copyMakeBorder_caller<int,   2>*/ , 0/*copyMakeBorder_caller<int,   3>*/, 0/*copyMakeBorder_caller<int  , 4>*/},
-            {   copyMakeBorder_caller<float, 1>  , 0/*copyMakeBorder_caller<float, 2>*/ ,    copyMakeBorder_caller<float, 3>  ,    copyMakeBorder_caller<float ,4>}
-        };
-
-        caller_t func = callers[src.depth()][src.channels() - 1];
-        CV_Assert(func != 0);
-
-        int gpuBorderType;
-        CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
-
-        func(src, dst, top, left, gpuBorderType, value, stream);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpPlaneMaps
 
diff --git a/modules/gpu/test/test_copy_make_border.cpp b/modules/gpu/test/test_copy_make_border.cpp
deleted file mode 100644
index 24a75c023..000000000
--- a/modules/gpu/test/test_copy_make_border.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_CUDA
-
-using namespace cvtest;
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(Border, int)
-}
-
-PARAM_TEST_CASE(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, Border, BorderType, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int border;
-    int borderType;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        border = GET_PARAM(3);
-        borderType = GET_PARAM(4);
-        useRoi = GET_PARAM(5);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(CopyMakeBorder, Accuracy)
-{
-    cv::Mat src = randomMat(size, type);
-    cv::Scalar val = randomScalar(0, 255);
-
-    cv::gpu::GpuMat dst = createMat(cv::Size(size.width + 2 * border, size.height + 2 * border), type, useRoi);
-    cv::gpu::copyMakeBorder(loadMat(src, useRoi), dst, border, border, border, border, borderType, val);
-
-    cv::Mat dst_gold;
-    cv::copyMakeBorder(src, dst_gold, border, border, border, border, borderType, val);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1),
-                    MatType(CV_8UC3),
-                    MatType(CV_8UC4),
-                    MatType(CV_16UC1),
-                    MatType(CV_16UC3),
-                    MatType(CV_16UC4),
-                    MatType(CV_32FC1),
-                    MatType(CV_32FC3),
-                    MatType(CV_32FC4)),
-    testing::Values(Border(1), Border(10), Border(50)),
-    ALL_BORDER_TYPES,
-    WHOLE_SUBMAT));
-
-#endif // HAVE_CUDA
diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt
index 99a6fcce1..04a6b2cc2 100644
--- a/modules/gpuarithm/CMakeLists.txt
+++ b/modules/gpuarithm/CMakeLists.txt
@@ -6,7 +6,7 @@ set(the_description "GPU-accelerated Operations on Matrices")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuarithm opencv_core)
+ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_imgproc)
 
 if(HAVE_CUBLAS)
   CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index 5724cd53c..03458ea04 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -279,6 +279,10 @@ CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, doubl
 //! output will have CV_32FC1 type
 CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
 
+//! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType,
+                               const Scalar& value = Scalar(), Stream& stream = Stream::Null());
+
 }} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPUARITHM_HPP__ */
diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
index 829637f4e..a9a6e360b 100644
--- a/modules/gpuarithm/perf/perf_core.cpp
+++ b/modules/gpuarithm/perf/perf_core.cpp
@@ -2155,3 +2155,47 @@ PERF_TEST_P(Sz_Depth_NormType, Core_Normalize,
         CPU_SANITY_CHECK(dst);
     }
 }
+
+//////////////////////////////////////////////////////////////////////
+// CopyMakeBorder
+
+#ifdef HAVE_OPENCV_IMGPROC
+
+DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode);
+
+PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    ALL_BORDER_MODES))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int borderMode = GET_PARAM(3);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::copyMakeBorder(d_src, dst, 5, 5, 5, 5, borderMode);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+#endif
diff --git a/modules/gpuarithm/perf/perf_precomp.hpp b/modules/gpuarithm/perf/perf_precomp.hpp
index 06bc20b9b..bee378064 100644
--- a/modules/gpuarithm/perf/perf_precomp.hpp
+++ b/modules/gpuarithm/perf/perf_precomp.hpp
@@ -57,6 +57,12 @@
 #include "opencv2/core.hpp"
 #include "opencv2/gpuarithm.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_IMGPROC
+#  include "opencv2/imgproc.hpp"
+#endif
+
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
 #endif
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index d452e3ae7..40242876d 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -60,6 +60,7 @@ void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool,
 void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
 void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); }
 void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -608,4 +609,113 @@ void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+// copyMakeBorder
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
+    }
+}}}
+
+namespace
+{
+    template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
+    {
+        using namespace ::cv::gpu::cudev::imgproc;
+
+        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
+
+        copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);
+    }
+}
+
+#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
+typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
+#else
+typedef Npp32s Npp32s_a;
+#endif
+
+void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)
+{
+    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+    CV_Assert(borderType == IPL_BORDER_REFLECT_101 || borderType == IPL_BORDER_REPLICATE || borderType == IPL_BORDER_CONSTANT || borderType == IPL_BORDER_REFLECT || borderType == IPL_BORDER_WRAP);
+
+    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    if (borderType == IPL_BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
+    {
+        NppiSize srcsz;
+        srcsz.width  = src.cols;
+        srcsz.height = src.rows;
+
+        NppiSize dstsz;
+        dstsz.width  = dst.cols;
+        dstsz.height = dst.rows;
+
+        NppStreamHandler h(stream);
+
+        switch (src.type())
+        {
+        case CV_8UC1:
+            {
+                Npp8u nVal = saturate_cast<Npp8u>(value[0]);
+                nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        case CV_8UC4:
+            {
+                Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};
+                nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        case CV_32SC1:
+            {
+                Npp32s nVal = saturate_cast<Npp32s>(value[0]);
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        case CV_32FC1:
+            {
+                Npp32f val = saturate_cast<Npp32f>(value[0]);
+                Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        }
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+    else
+    {
+        typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
+        static const caller_t callers[6][4] =
+        {
+            {   copyMakeBorder_caller<uchar, 1>  ,    copyMakeBorder_caller<uchar, 2>   ,    copyMakeBorder_caller<uchar, 3>  ,    copyMakeBorder_caller<uchar, 4>},
+            {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},
+            {   copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/,    copyMakeBorder_caller<ushort, 3> ,    copyMakeBorder_caller<ushort, 4>},
+            {   copyMakeBorder_caller<short, 1>  , 0/*copyMakeBorder_caller<short, 2>*/ ,    copyMakeBorder_caller<short, 3>  ,    copyMakeBorder_caller<short, 4>},
+            {0/*copyMakeBorder_caller<int,   1>*/, 0/*copyMakeBorder_caller<int,   2>*/ , 0/*copyMakeBorder_caller<int,   3>*/, 0/*copyMakeBorder_caller<int  , 4>*/},
+            {   copyMakeBorder_caller<float, 1>  , 0/*copyMakeBorder_caller<float, 2>*/ ,    copyMakeBorder_caller<float, 3>  ,    copyMakeBorder_caller<float ,4>}
+        };
+
+        caller_t func = callers[src.depth()][src.channels() - 1];
+        CV_Assert(func != 0);
+
+        int gpuBorderType;
+        CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
+
+        func(src, dst, top, left, gpuBorderType, value, stream);
+    }
+}
+
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpuarithm/src/cuda/copy_make_border.cu
similarity index 100%
rename from modules/gpu/src/cuda/copy_make_border.cu
rename to modules/gpuarithm/src/cuda/copy_make_border.cu
diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp
index b2072c25f..613d7122a 100644
--- a/modules/gpuarithm/test/test_core.cpp
+++ b/modules/gpuarithm/test/test_core.cpp
@@ -3607,4 +3607,68 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Normalize, testing::Combine(
     testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF), NormCode(cv::NORM_MINMAX)),
     WHOLE_SUBMAT));
 
+//////////////////////////////////////////////////////////////////////////////
+// CopyMakeBorder
+
+#ifdef HAVE_OPENCV_IMGPROC
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(Border, int)
+}
+
+PARAM_TEST_CASE(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, Border, BorderType, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int border;
+    int borderType;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        border = GET_PARAM(3);
+        borderType = GET_PARAM(4);
+        useRoi = GET_PARAM(5);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(CopyMakeBorder, Accuracy)
+{
+    cv::Mat src = randomMat(size, type);
+    cv::Scalar val = randomScalar(0, 255);
+
+    cv::gpu::GpuMat dst = createMat(cv::Size(size.width + 2 * border, size.height + 2 * border), type, useRoi);
+    cv::gpu::copyMakeBorder(loadMat(src, useRoi), dst, border, border, border, border, borderType, val);
+
+    cv::Mat dst_gold;
+    cv::copyMakeBorder(src, dst_gold, border, border, border, border, borderType, val);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC1),
+                    MatType(CV_8UC3),
+                    MatType(CV_8UC4),
+                    MatType(CV_16UC1),
+                    MatType(CV_16UC3),
+                    MatType(CV_16UC4),
+                    MatType(CV_32FC1),
+                    MatType(CV_32FC3),
+                    MatType(CV_32FC4)),
+    testing::Values(Border(1), Border(10), Border(50)),
+    ALL_BORDER_TYPES,
+    WHOLE_SUBMAT));
+
+#endif
+
 #endif // HAVE_CUDA
diff --git a/modules/gpuarithm/test/test_precomp.hpp b/modules/gpuarithm/test/test_precomp.hpp
index 089627758..800ed31c0 100644
--- a/modules/gpuarithm/test/test_precomp.hpp
+++ b/modules/gpuarithm/test/test_precomp.hpp
@@ -57,4 +57,10 @@
 #include "opencv2/core.hpp"
 #include "opencv2/gpuarithm.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_IMGPROC
+#  include "opencv2/imgproc.hpp"
+#endif
+
 #endif

From c56bdbc1c5768dfb76eccb531268b20f5ebe262f Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 14:18:18 +0400
Subject: [PATCH 12/49] moved integral to gpuarithm module

---
 modules/gpu/include/opencv2/gpu.hpp           |  12 --
 modules/gpu/perf/perf_imgproc.cpp             |  57 ---------
 modules/gpu/src/imgproc.cpp                   | 108 ----------------
 modules/gpu/test/test_imgproc.cpp             |  37 ------
 modules/gpuarithm/CMakeLists.txt              |   2 +-
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |  12 ++
 modules/gpuarithm/perf/perf_core.cpp          |  61 ++++++++-
 modules/gpuarithm/src/arithm.cpp              | 118 ++++++++++++++++++
 .../src/cuda/integral.cu}                     |   0
 modules/gpuarithm/src/precomp.hpp             |   7 ++
 modules/gpuarithm/test/test_core.cpp          |  41 +++++-
 11 files changed, 236 insertions(+), 219 deletions(-)
 rename modules/{gpu/src/cuda/integral_image.cu => gpuarithm/src/cuda/integral.cu} (100%)

diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 642c327a3..ead3ab333 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -168,18 +168,6 @@ CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K
 CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
                        int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
 
-//! computes the integral image
-//! sum will have CV_32S type, but will contain unsigned int values
-//! supports only CV_8UC1 source type
-CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
-//! buffered version
-CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
-
-//! computes squared integral image
-//! result matrix will have 64F type, but will contain 64U values
-//! supports source images of 8UC1 type only
-CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
-
 //! computes Harris cornerness criteria at each image pixel
 CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
 CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index 02a1e6cd4..d26bb844b 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -363,63 +363,6 @@ PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// Integral
-
-PERF_TEST_P(Sz, ImgProc_Integral,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_buf;
-
-        TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::integral(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// IntegralSqr
-
-PERF_TEST_P(Sz, ImgProc_IntegralSqr,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // HistEvenC1
 
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index 54a693700..3a967fb35 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -55,9 +55,6 @@ void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&,
 void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
@@ -412,111 +409,6 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
     funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
 }
 
-////////////////////////////////////////////////////////////////////////
-// integral
-
-void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
-{
-    GpuMat buffer;
-    integralBuffered(src, sum, buffer, s);
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)
-{
-    CV_Assert(src.type() == CV_8UC1);
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    cv::Size whole;
-    cv::Point offset;
-
-    src.locateROI(whole, offset);
-
-    if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
-        && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
-    {
-        ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
-
-        cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
-
-        sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
-        if (s)
-            s.enqueueMemSet(sum, Scalar::all(0));
-        else
-            sum.setTo(Scalar::all(0));
-
-        GpuMat inner = sum(Rect(1, 1, src.cols, src.rows));
-        GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
-
-        if (s)
-            s.enqueueCopy(res, inner);
-        else
-            res.copyTo(inner);
-    }
-    else
-    {
-        sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
-
-        NcvSize32u roiSize;
-        roiSize.width = src.cols;
-        roiSize.height = src.rows;
-
-        cudaDeviceProp prop;
-        cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
-
-        Ncv32u bufSize;
-        ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
-        ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
-
-
-        NppStStreamHandler h(stream);
-
-        ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
-            sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// sqrIntegral
-
-void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
-{
-    CV_Assert(src.type() == CV_8U);
-
-    NcvSize32u roiSize;
-    roiSize.width = src.cols;
-    roiSize.height = src.rows;
-
-    cudaDeviceProp prop;
-    cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
-
-    Ncv32u bufSize;
-    ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
-    GpuMat buf(1, bufSize, CV_8U);
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    NppStStreamHandler h(stream);
-
-    sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
-    ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
-            sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-}
-
 
 ////////////////////////////////////////////////////////////////////////
 // Histogram
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp
index a38f27b74..ffc413ee0 100644
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -46,43 +46,6 @@
 
 using namespace cvtest;
 
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// Integral
-
-PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Integral, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_8UC1);
-
-    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
-    cv::gpu::integral(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold;
-    cv::integral(src, dst_gold, CV_32S);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Integral, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    WHOLE_SUBMAT));
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // HistEven
 
diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt
index 04a6b2cc2..75cab4b31 100644
--- a/modules/gpuarithm/CMakeLists.txt
+++ b/modules/gpuarithm/CMakeLists.txt
@@ -6,7 +6,7 @@ set(the_description "GPU-accelerated Operations on Matrices")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_imgproc)
+ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_gpunvidia opencv_imgproc)
 
 if(HAVE_CUBLAS)
   CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index 03458ea04..8829e43a7 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -283,6 +283,18 @@ CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, co
 CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType,
                                const Scalar& value = Scalar(), Stream& stream = Stream::Null());
 
+//! computes the integral image
+//! sum will have CV_32S type, but will contain unsigned int values
+//! supports only CV_8UC1 source type
+CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
+//! buffered version
+CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
+
+//! computes squared integral image
+//! result matrix will have 64F type, but will contain 64U values
+//! supports source images of 8UC1 type only
+CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
+
 }} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPUARITHM_HPP__ */
diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
index a9a6e360b..603d2448e 100644
--- a/modules/gpuarithm/perf/perf_core.cpp
+++ b/modules/gpuarithm/perf/perf_core.cpp
@@ -2156,11 +2156,11 @@ PERF_TEST_P(Sz_Depth_NormType, Core_Normalize,
     }
 }
 
+#ifdef HAVE_OPENCV_IMGPROC
+
 //////////////////////////////////////////////////////////////////////
 // CopyMakeBorder
 
-#ifdef HAVE_OPENCV_IMGPROC
-
 DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode);
 
 PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder,
@@ -2198,4 +2198,61 @@ PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder,
     }
 }
 
+//////////////////////////////////////////////////////////////////////
+// Integral
+
+PERF_TEST_P(Sz, ImgProc_Integral,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat d_buf;
+
+        TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::integral(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// IntegralSqr
+
+PERF_TEST_P(Sz, ImgProc_IntegralSqr,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
 #endif
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index 40242876d..baf598d96 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -61,6 +61,9 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool,
 void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); }
 void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
 void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); }
+void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -718,4 +721,119 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+// integral
+
+void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
+{
+    GpuMat buffer;
+    integralBuffered(src, sum, buffer, s);
+}
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
+    }
+}}}
+
+void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)
+{
+    CV_Assert(src.type() == CV_8UC1);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    cv::Size whole;
+    cv::Point offset;
+
+    src.locateROI(whole, offset);
+
+    if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
+        && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
+    {
+        ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
+
+        cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
+
+        sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
+        if (s)
+            s.enqueueMemSet(sum, Scalar::all(0));
+        else
+            sum.setTo(Scalar::all(0));
+
+        GpuMat inner = sum(Rect(1, 1, src.cols, src.rows));
+        GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
+
+        if (s)
+            s.enqueueCopy(res, inner);
+        else
+            res.copyTo(inner);
+    }
+    else
+    {
+#ifndef HAVE_OPENCV_GPUNVIDIA
+    throw_no_cuda();
+#else
+        sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
+
+        NcvSize32u roiSize;
+        roiSize.width = src.cols;
+        roiSize.height = src.rows;
+
+        cudaDeviceProp prop;
+        cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
+
+        Ncv32u bufSize;
+        ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
+        ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
+
+        NppStStreamHandler h(stream);
+
+        ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
+            sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+#endif
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// sqrIntegral
+
+void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
+{
+#ifndef HAVE_OPENCV_GPUNVIDIA
+    (void) src;
+    (void) sqsum;
+    (void) s;
+    throw_no_cuda();
+#else
+    CV_Assert(src.type() == CV_8U);
+
+    NcvSize32u roiSize;
+    roiSize.width = src.cols;
+    roiSize.height = src.rows;
+
+    cudaDeviceProp prop;
+    cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
+
+    Ncv32u bufSize;
+    ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
+    GpuMat buf(1, bufSize, CV_8U);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    NppStStreamHandler h(stream);
+
+    sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
+    ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
+            sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
+
+    if (stream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
+#endif
+}
+
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpu/src/cuda/integral_image.cu b/modules/gpuarithm/src/cuda/integral.cu
similarity index 100%
rename from modules/gpu/src/cuda/integral_image.cu
rename to modules/gpuarithm/src/cuda/integral.cu
diff --git a/modules/gpuarithm/src/precomp.hpp b/modules/gpuarithm/src/precomp.hpp
index 7d36adb46..6e21684aa 100644
--- a/modules/gpuarithm/src/precomp.hpp
+++ b/modules/gpuarithm/src/precomp.hpp
@@ -51,6 +51,13 @@
 
 #include "opencv2/core/gpu_private.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUNVIDIA
+#  include "opencv2/gpunvidia.hpp"
+#  include "opencv2/gpunvidia/private.hpp"
+#endif
+
 #ifdef HAVE_CUBLAS
     #include <cublas.h>
 #endif
diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp
index 613d7122a..36c155480 100644
--- a/modules/gpuarithm/test/test_core.cpp
+++ b/modules/gpuarithm/test/test_core.cpp
@@ -3607,11 +3607,11 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Normalize, testing::Combine(
     testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF), NormCode(cv::NORM_MINMAX)),
     WHOLE_SUBMAT));
 
+#ifdef HAVE_OPENCV_IMGPROC
+
 //////////////////////////////////////////////////////////////////////////////
 // CopyMakeBorder
 
-#ifdef HAVE_OPENCV_IMGPROC
-
 namespace
 {
     IMPLEMENT_PARAM_CLASS(Border, int)
@@ -3669,6 +3669,43 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine(
     ALL_BORDER_TYPES,
     WHOLE_SUBMAT));
 
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// Integral
+
+PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Integral, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
+    cv::gpu::integral(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold;
+    cv::integral(src, dst_gold, CV_32S);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Integral, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
 #endif
 
 #endif // HAVE_CUDA

From d569e72ad43ecd31c855f0c0d04b75ebed2ff84f Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 9 Apr 2013 15:49:56 +0400
Subject: [PATCH 13/49] moved mulSpectrums, dft and convolve to gpuarithm

---
 modules/gpu/CMakeLists.txt                    |   6 -
 modules/gpu/include/opencv2/gpu.hpp           |  43 --
 modules/gpu/perf/perf_imgproc.cpp             | 150 -------
 modules/gpu/src/cuda/imgproc.cu               | 113 -----
 modules/gpu/src/cuda/safe_call.hpp            |  15 -
 modules/gpu/src/error.cpp                     |  62 ---
 modules/gpu/src/imgproc.cpp                   | 299 -------------
 modules/gpu/test/test_imgproc.cpp             | 272 ------------
 modules/gpuarithm/CMakeLists.txt              |   4 +
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |  43 ++
 modules/gpuarithm/perf/perf_core.cpp          | 150 +++++++
 modules/gpuarithm/src/arithm.cpp              | 401 ++++++++++++++++--
 modules/gpuarithm/src/cuda/mul_spectrums.cu   | 171 ++++++++
 modules/gpuarithm/src/precomp.hpp             |   6 +-
 modules/gpuarithm/test/test_core.cpp          | 272 ++++++++++++
 15 files changed, 1014 insertions(+), 993 deletions(-)
 create mode 100644 modules/gpuarithm/src/cuda/mul_spectrums.cu

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index ecad9e4dd..55fc1007e 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -48,12 +48,6 @@ ocv_set_module_sources(
 
 ocv_create_module(${cuda_link_libs})
 
-if(HAVE_CUDA)
-  if(HAVE_CUFFT)
-    CUDA_ADD_CUFFT_TO_TARGET(${the_module})
-  endif()
-endif()
-
 ocv_add_precompiled_headers(${the_module})
 
 ################################################################################################################
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index ead3ab333..19fd7c93e 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -180,49 +180,6 @@ CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, Gp
 CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
     int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
 
-//! performs per-element multiplication of two full (not packed) Fourier spectrums
-//! supports 32FC2 matrixes only (interleaved format)
-CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream = Stream::Null());
-
-//! performs per-element multiplication of two full (not packed) Fourier spectrums
-//! supports 32FC2 matrixes only (interleaved format)
-CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null());
-
-//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
-//! Param dft_size is the size of DFT transform.
-//!
-//! If the source matrix is not continous, then additional copy will be done,
-//! so to avoid copying ensure the source matrix is continous one. If you want to use
-//! preallocated output ensure it is continuous too, otherwise it will be reallocated.
-//!
-//! Being implemented via CUFFT real-to-complex transform result contains only non-redundant values
-//! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved.
-//!
-//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format.
-CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
-
-struct CV_EXPORTS ConvolveBuf
-{
-    Size result_size;
-    Size block_size;
-    Size user_block_size;
-    Size dft_size;
-    int spect_len;
-
-    GpuMat image_spect, templ_spect, result_spect;
-    GpuMat image_block, templ_block, result_data;
-
-    void create(Size image_size, Size templ_size);
-    static Size estimateBlockSize(Size result_size, Size templ_size);
-};
-
-
-//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
-//! supports source images of 32FC1 type only
-//! result matrix will have 32FC1 type
-CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr = false);
-CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null());
-
 struct CV_EXPORTS MatchTemplateBuf
 {
     Size user_block_size;
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index d26bb844b..5f8e9b297 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -718,54 +718,6 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_BlendLinear,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// Convolve
-
-DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool);
-
-PERF_TEST_P(Sz_KernelSz_Ccorr, ImgProc_Convolve,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(17, 27, 32, 64),
-                    Bool()))
-{
-    declare.time(10.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int templ_size = GET_PARAM(1);
-    const bool ccorr = GET_PARAM(2);
-
-    const cv::Mat image(size, CV_32FC1);
-    const cv::Mat templ(templ_size, templ_size, CV_32FC1);
-    declare.in(image, templ, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_image = cv::gpu::createContinuous(size, CV_32FC1);
-        d_image.upload(image);
-
-        cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
-        d_templ.upload(templ);
-
-        cv::gpu::GpuMat dst;
-        cv::gpu::ConvolveBuf d_buf;
-
-        TEST_CYCLE() cv::gpu::convolve(d_image, d_templ, dst, ccorr, d_buf);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        if (ccorr)
-            FAIL_NO_CPU();
-
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate8U
 
@@ -846,108 +798,6 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate32F,
 
         TEST_CYCLE() cv::matchTemplate(image, templ, dst, method);
 
-        CPU_SANITY_CHECK(dst);
-    }
-};
-
-//////////////////////////////////////////////////////////////////////
-// MulSpectrums
-
-CV_FLAGS(DftFlags, 0, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT)
-
-DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
-
-PERF_TEST_P(Sz_Flags, ImgProc_MulSpectrums,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(0, DftFlags(cv::DFT_ROWS))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int flag = GET_PARAM(1);
-
-    cv::Mat a(size, CV_32FC2);
-    cv::Mat b(size, CV_32FC2);
-    declare.in(a, b, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_a(a);
-        const cv::gpu::GpuMat d_b(b);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::mulSpectrums(d_a, d_b, dst, flag);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MulAndScaleSpectrums
-
-PERF_TEST_P(Sz, ImgProc_MulAndScaleSpectrums,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const float scale = 1.f / size.area();
-
-    cv::Mat src1(size, CV_32FC2);
-    cv::Mat src2(size, CV_32FC2);
-    declare.in(src1,src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Dft
-
-PERF_TEST_P(Sz_Flags, ImgProc_Dft,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
-{
-    declare.time(10.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int flag = GET_PARAM(1);
-
-    cv::Mat src(size, CV_32FC2);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::dft(d_src, dst, size, flag);
-
-        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::dft(src, dst, flag);
-
         CPU_SANITY_CHECK(dst);
     }
 }
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu
index f209ab680..01cfae4cb 100644
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -582,119 +582,6 @@ namespace cv { namespace gpu { namespace cudev
                 cudaSafeCall(cudaDeviceSynchronize());
         }
 
-        //////////////////////////////////////////////////////////////////////////
-        // mulSpectrums
-
-        __global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x < c.cols && y < c.rows)
-            {
-                c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
-            }
-        }
-
-
-        void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
-        {
-            dim3 threads(256);
-            dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
-
-            mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-
-        //////////////////////////////////////////////////////////////////////////
-        // mulSpectrums_CONJ
-
-        __global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x < c.cols && y < c.rows)
-            {
-                c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
-            }
-        }
-
-
-        void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
-        {
-            dim3 threads(256);
-            dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
-
-            mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-
-        //////////////////////////////////////////////////////////////////////////
-        // mulAndScaleSpectrums
-
-        __global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x < c.cols && y < c.rows)
-            {
-                cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
-                c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
-            }
-        }
-
-
-        void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
-        {
-            dim3 threads(256);
-            dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
-
-            mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-
-        //////////////////////////////////////////////////////////////////////////
-        // mulAndScaleSpectrums_CONJ
-
-        __global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x < c.cols && y < c.rows)
-            {
-                cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
-                c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
-            }
-        }
-
-
-        void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
-        {
-            dim3 threads(256);
-            dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
-
-            mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
         //////////////////////////////////////////////////////////////////////////
         // buildWarpMaps
 
diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp
index 1d4a437e6..35530bee3 100644
--- a/modules/gpu/src/cuda/safe_call.hpp
+++ b/modules/gpu/src/cuda/safe_call.hpp
@@ -45,21 +45,6 @@
 
 #include <cufft.h>
 
-#if defined(__GNUC__)
-    #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
-#else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__)
-#endif
 
-namespace cv { namespace gpu
-{
-    void cufftError(int err, const char *file, const int line, const char *func = "");
-}}
-
-static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
-{
-    if (CUFFT_SUCCESS != err)
-        cv::gpu::cufftError(err, file, line, func);
-}
 
 #endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
diff --git a/modules/gpu/src/error.cpp b/modules/gpu/src/error.cpp
index 3b8b6b3ee..d0a621d79 100644
--- a/modules/gpu/src/error.cpp
+++ b/modules/gpu/src/error.cpp
@@ -43,65 +43,3 @@
 
 using namespace cv;
 using namespace cv::gpu;
-
-#ifdef HAVE_CUDA
-
-namespace
-{
-    #define error_entry(entry)  { entry, #entry }
-
-    struct ErrorEntry
-    {
-        int code;
-        const char* str;
-    };
-
-    struct ErrorEntryComparer
-    {
-        int code;
-        ErrorEntryComparer(int code_) : code(code_) {}
-        bool operator()(const ErrorEntry& e) const { return e.code == code; }
-    };
-
-    String getErrorString(int code, const ErrorEntry* errors, size_t n)
-    {
-        size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
-
-        const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
-        String str = cv::format("%s [Code = %d]", msg, code);
-
-        return str;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    // CUFFT errors
-
-    const ErrorEntry cufft_errors[] =
-    {
-        error_entry( CUFFT_INVALID_PLAN ),
-        error_entry( CUFFT_ALLOC_FAILED ),
-        error_entry( CUFFT_INVALID_TYPE ),
-        error_entry( CUFFT_INVALID_VALUE ),
-        error_entry( CUFFT_INTERNAL_ERROR ),
-        error_entry( CUFFT_EXEC_FAILED ),
-        error_entry( CUFFT_SETUP_FAILED ),
-        error_entry( CUFFT_INVALID_SIZE ),
-        error_entry( CUFFT_UNALIGNED_DATA )
-    };
-
-    const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
-}
-
-namespace cv
-{
-    namespace gpu
-    {
-        void cufftError(int code, const char* file, const int line, const char* func)
-        {
-            String msg = getErrorString(code, cufft_errors, cufft_error_num);
-            cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
-        }
-    }
-}
-
-#endif
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp
index 3a967fb35..c21a7b837 100644
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -73,12 +73,6 @@ void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, in
 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::ConvolveBuf::create(Size, Size) { throw_no_cuda(); }
-void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_no_cuda(); }
-void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream&) { throw_no_cuda(); }
 void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
 void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
 void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_no_cuda(); }
@@ -848,299 +842,6 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
     cornerMinEigenVal_gpu(blockSize, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
 }
 
-//////////////////////////////////////////////////////////////////////////////
-// mulSpectrums
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-
-        void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
-{
-    (void)flags;
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream);
-
-    static Caller callers[] = { cudev::imgproc::mulSpectrums, cudev::imgproc::mulSpectrums_CONJ };
-
-    CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
-    CV_Assert(a.size() == b.size());
-
-    c.create(a.size(), CV_32FC2);
-
-    Caller caller = callers[(int)conjB];
-    caller(a, b, c, StreamAccessor::getStream(stream));
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// mulAndScaleSpectrums
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-
-        void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
-{
-    (void)flags;
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, PtrStepSz<cufftComplex>, cudaStream_t stream);
-    static Caller callers[] = { cudev::imgproc::mulAndScaleSpectrums, cudev::imgproc::mulAndScaleSpectrums_CONJ };
-
-    CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
-    CV_Assert(a.size() == b.size());
-
-    c.create(a.size(), CV_32FC2);
-
-    Caller caller = callers[(int)conjB];
-    caller(a, b, scale, c, StreamAccessor::getStream(stream));
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// dft
-
-void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stream& stream)
-{
-#ifndef HAVE_CUFFT
-
-    OPENCV_GPU_UNUSED(src);
-    OPENCV_GPU_UNUSED(dst);
-    OPENCV_GPU_UNUSED(dft_size);
-    OPENCV_GPU_UNUSED(flags);
-    OPENCV_GPU_UNUSED(stream);
-
-    throw_no_cuda();
-
-#else
-
-    CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
-
-    // We don't support unpacked output (in the case of real input)
-    CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));
-
-    bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);
-    int is_row_dft = flags & DFT_ROWS;
-    int is_scaled_dft = flags & DFT_SCALE;
-    int is_inverse = flags & DFT_INVERSE;
-    bool is_complex_input = src.channels() == 2;
-    bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
-
-    // We don't support real-to-real transform
-    CV_Assert(is_complex_input || is_complex_output);
-
-    GpuMat src_data;
-
-    // Make sure here we work with the continuous input,
-    // as CUFFT can't handle gaps
-    src_data = src;
-    createContinuous(src.rows, src.cols, src.type(), src_data);
-    if (src_data.data != src.data)
-        src.copyTo(src_data);
-
-    Size dft_size_opt = dft_size;
-    if (is_1d_input && !is_row_dft)
-    {
-        // If the source matrix is single column handle it as single row
-        dft_size_opt.width = std::max(dft_size.width, dft_size.height);
-        dft_size_opt.height = std::min(dft_size.width, dft_size.height);
-    }
-
-    cufftType dft_type = CUFFT_R2C;
-    if (is_complex_input)
-        dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
-
-    CV_Assert(dft_size_opt.width > 1);
-
-    cufftHandle plan;
-    if (is_1d_input || is_row_dft)
-        cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);
-    else
-        cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);
-
-    cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
-
-    if (is_complex_input)
-    {
-        if (is_complex_output)
-        {
-            createContinuous(dft_size, CV_32FC2, dst);
-            cufftSafeCall(cufftExecC2C(
-                    plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
-                    is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
-        }
-        else
-        {
-            createContinuous(dft_size, CV_32F, dst);
-            cufftSafeCall(cufftExecC2R(
-                    plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
-        }
-    }
-    else
-    {
-        // We could swap dft_size for efficiency. Here we must reflect it
-        if (dft_size == dft_size_opt)
-            createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);
-        else
-            createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);
-
-        cufftSafeCall(cufftExecR2C(
-                plan, src_data.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
-    }
-
-    cufftSafeCall(cufftDestroy(plan));
-
-    if (is_scaled_dft)
-        multiply(dst, Scalar::all(1. / dft_size.area()), dst, 1, -1, stream);
-
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// convolve
-
-void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
-{
-    result_size = Size(image_size.width - templ_size.width + 1,
-                       image_size.height - templ_size.height + 1);
-
-    block_size = user_block_size;
-    if (user_block_size.width == 0 || user_block_size.height == 0)
-        block_size = estimateBlockSize(result_size, templ_size);
-
-    dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
-    dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
-
-    // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192),
-    // see CUDA Toolkit 4.1 CUFFT Library Programming Guide
-    if (dft_size.width > 8192)
-        dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);
-    if (dft_size.height > 8192)
-        dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
-
-    // To avoid wasting time doing small DFTs
-    dft_size.width = std::max(dft_size.width, 512);
-    dft_size.height = std::max(dft_size.height, 512);
-
-    createContinuous(dft_size, CV_32F, image_block);
-    createContinuous(dft_size, CV_32F, templ_block);
-    createContinuous(dft_size, CV_32F, result_data);
-
-    spect_len = dft_size.height * (dft_size.width / 2 + 1);
-    createContinuous(1, spect_len, CV_32FC2, image_spect);
-    createContinuous(1, spect_len, CV_32FC2, templ_spect);
-    createContinuous(1, spect_len, CV_32FC2, result_spect);
-
-    // Use maximum result matrix block size for the estimated DFT block size
-    block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
-    block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
-}
-
-
-Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size*/)
-{
-    int width = (result_size.width + 2) / 3;
-    int height = (result_size.height + 2) / 3;
-    width = std::min(width, result_size.width);
-    height = std::min(height, result_size.height);
-    return Size(width, height);
-}
-
-
-void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr)
-{
-    ConvolveBuf buf;
-    convolve(image, templ, result, ccorr, buf);
-}
-
-void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
-{
-    using namespace ::cv::gpu::cudev::imgproc;
-
-#ifndef HAVE_CUFFT
-    throw_no_cuda();
-#else
-    CV_Assert(image.type() == CV_32F);
-    CV_Assert(templ.type() == CV_32F);
-
-    buf.create(image.size(), templ.size());
-    result.create(buf.result_size, CV_32F);
-
-    Size& block_size = buf.block_size;
-    Size& dft_size = buf.dft_size;
-
-    GpuMat& image_block = buf.image_block;
-    GpuMat& templ_block = buf.templ_block;
-    GpuMat& result_data = buf.result_data;
-
-    GpuMat& image_spect = buf.image_spect;
-    GpuMat& templ_spect = buf.templ_spect;
-    GpuMat& result_spect = buf.result_spect;
-
-    cufftHandle planR2C, planC2R;
-    cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));
-    cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));
-
-    cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) );
-    cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) );
-
-    GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);
-    copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
-                   templ_block.cols - templ_roi.cols, 0, Scalar(), stream);
-
-    cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(),
-                               templ_spect.ptr<cufftComplex>()));
-
-    // Process all blocks of the result matrix
-    for (int y = 0; y < result.rows; y += block_size.height)
-    {
-        for (int x = 0; x < result.cols; x += block_size.width)
-        {
-            Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
-                                std::min(y + dft_size.height, image.rows) - y);
-            GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x),
-                             image.step);
-            copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
-                           0, image_block.cols - image_roi.cols, 0, Scalar(), stream);
-
-            cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(),
-                                       image_spect.ptr<cufftComplex>()));
-            mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
-                                 1.f / dft_size.area(), ccorr, stream);
-            cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
-                                       result_data.ptr<cufftReal>()));
-
-            Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
-                                 std::min(y + block_size.height, result.rows) - y);
-            GpuMat result_roi(result_roi_size, result.type(),
-                              (void*)(result.ptr<float>(y) + x), result.step);
-            GpuMat result_block(result_roi_size, result_data.type(),
-                                result_data.ptr(), result_data.step);
-
-            if (stream)
-                stream.enqueueCopy(result_block, result_roi);
-            else
-                result_block.copyTo(result_roi);
-        }
-    }
-
-    cufftSafeCall(cufftDestroy(planR2C));
-    cufftSafeCall(cufftDestroy(planC2R));
-#endif
-}
-
 
 //////////////////////////////////////////////////////////////////////////////
 // Canny
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp
index ffc413ee0..6957f5437 100644
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -489,92 +489,6 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, testing::Combine(
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
     WHOLE_SUBMAT));
 
-////////////////////////////////////////////////////////
-// Convolve
-
-namespace
-{
-    void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
-    {
-        // reallocate the output array if needed
-        C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
-        cv::Size dftSize;
-
-        // compute the size of DFT transform
-        dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
-        dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
-
-        // allocate temporary buffers and initialize them with 0s
-        cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
-        cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
-
-        // copy A and B to the top-left corners of tempA and tempB, respectively
-        cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
-        A.copyTo(roiA);
-        cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
-        B.copyTo(roiB);
-
-        // now transform the padded A & B in-place;
-        // use "nonzeroRows" hint for faster processing
-        cv::dft(tempA, tempA, 0, A.rows);
-        cv::dft(tempB, tempB, 0, B.rows);
-
-        // multiply the spectrums;
-        // the function handles packed spectrum representations well
-        cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
-
-        // transform the product back from the frequency domain.
-        // Even though all the result rows will be non-zero,
-        // you need only the first C.rows of them, and thus you
-        // pass nonzeroRows == C.rows
-        cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
-
-        // now copy the result back to C.
-        tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
-    }
-
-    IMPLEMENT_PARAM_CLASS(KSize, int);
-    IMPLEMENT_PARAM_CLASS(Ccorr, bool);
-}
-
-PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int ksize;
-    bool ccorr;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        ksize = GET_PARAM(2);
-        ccorr = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Convolve, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
-    cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::convolve(loadMat(src), loadMat(kernel), dst, ccorr);
-
-    cv::Mat dst_gold;
-    convolveDFT(src, kernel, dst_gold, ccorr);
-
-    EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Convolve, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
-    testing::Values(Ccorr(false), Ccorr(true))));
-
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate8U
 
@@ -830,192 +744,6 @@ GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
 
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate_CanFindBigTemplate, ALL_DEVICES);
 
-////////////////////////////////////////////////////////////////////////////
-// MulSpectrums
-
-CV_FLAGS(DftFlags, 0, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT)
-
-PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int flag;
-
-    cv::Mat a, b;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        flag = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        a = randomMat(size, CV_32FC2);
-        b = randomMat(size, CV_32FC2);
-    }
-};
-
-GPU_TEST_P(MulSpectrums, Simple)
-{
-    cv::gpu::GpuMat c;
-    cv::gpu::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
-
-    cv::Mat c_gold;
-    cv::mulSpectrums(a, b, c_gold, flag, false);
-
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
-}
-
-GPU_TEST_P(MulSpectrums, Scaled)
-{
-    float scale = 1.f / size.area();
-
-    cv::gpu::GpuMat c;
-    cv::gpu::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
-
-    cv::Mat c_gold;
-    cv::mulSpectrums(a, b, c_gold, flag, false);
-    c_gold.convertTo(c_gold, c_gold.type(), scale);
-
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MulSpectrums, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
-
-////////////////////////////////////////////////////////////////////////////
-// Dft
-
-struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-namespace
-{
-    void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
-    {
-        SCOPED_TRACE(hint);
-
-        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
-
-        cv::Mat b_gold;
-        cv::dft(a, b_gold, flags);
-
-        cv::gpu::GpuMat d_b;
-        cv::gpu::GpuMat d_b_data;
-        if (inplace)
-        {
-            d_b_data.create(1, a.size().area(), CV_32FC2);
-            d_b = cv::gpu::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
-        }
-        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
-
-        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
-        ASSERT_EQ(CV_32F, d_b.depth());
-        ASSERT_EQ(2, d_b.channels());
-        EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
-    }
-}
-
-GPU_TEST_P(Dft, C2C)
-{
-    int cols = randomInt(2, 100);
-    int rows = randomInt(2, 100);
-
-    for (int i = 0; i < 2; ++i)
-    {
-        bool inplace = i != 0;
-
-        testC2C("no flags", cols, rows, 0, inplace);
-        testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
-        testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
-        testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
-        testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
-        testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
-        testC2C("single col", 1, rows, 0, inplace);
-        testC2C("single row", cols, 1, 0, inplace);
-        testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
-        testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
-        testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
-        testC2C("size 1 2", 1, 2, 0, inplace);
-        testC2C("size 2 1", 2, 1, 0, inplace);
-    }
-}
-
-namespace
-{
-    void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
-    {
-        SCOPED_TRACE(hint);
-
-        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
-
-        cv::gpu::GpuMat d_b, d_c;
-        cv::gpu::GpuMat d_b_data, d_c_data;
-        if (inplace)
-        {
-            if (a.cols == 1)
-            {
-                d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
-                d_b = cv::gpu::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
-            }
-            else
-            {
-                d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
-                d_b = cv::gpu::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
-            }
-            d_c_data.create(1, a.size().area(), CV_32F);
-            d_c = cv::gpu::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
-        }
-
-        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
-        cv::gpu::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
-
-        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
-        EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
-        ASSERT_EQ(CV_32F, d_c.depth());
-        ASSERT_EQ(1, d_c.channels());
-
-        cv::Mat c(d_c);
-        EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
-    }
-}
-
-GPU_TEST_P(Dft, R2CThenC2R)
-{
-    int cols = randomInt(2, 100);
-    int rows = randomInt(2, 100);
-
-    testR2CThenC2R("sanity", cols, rows, false);
-    testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
-    testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
-    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
-    testR2CThenC2R("single col", 1, rows, false);
-    testR2CThenC2R("single col 1", 1, rows + 1, false);
-    testR2CThenC2R("single row", cols, 1, false);
-    testR2CThenC2R("single row 1", cols + 1, 1, false);
-
-    testR2CThenC2R("sanity", cols, rows, true);
-    testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
-    testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
-    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
-    testR2CThenC2R("single row", cols, 1, true);
-    testR2CThenC2R("single row 1", cols + 1, 1, true);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Dft, ALL_DEVICES);
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // CornerHarris
 
diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt
index 75cab4b31..4be25dd8d 100644
--- a/modules/gpuarithm/CMakeLists.txt
+++ b/modules/gpuarithm/CMakeLists.txt
@@ -11,3 +11,7 @@ ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_gpunvidia opencv_imgproc
 if(HAVE_CUBLAS)
   CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
 endif()
+
+if(HAVE_CUFFT)
+  CUDA_ADD_CUFFT_TO_TARGET(${the_module})
+endif()
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index 8829e43a7..f65d2ec55 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -295,6 +295,49 @@ CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer,
 //! supports source images of 8UC1 type only
 CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
 
+//! performs per-element multiplication of two full (not packed) Fourier spectrums
+//! supports 32FC2 matrixes only (interleaved format)
+CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream = Stream::Null());
+
+//! performs per-element multiplication of two full (not packed) Fourier spectrums
+//! supports 32FC2 matrixes only (interleaved format)
+CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null());
+
+//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+//! Param dft_size is the size of DFT transform.
+//!
+//! If the source matrix is not continous, then additional copy will be done,
+//! so to avoid copying ensure the source matrix is continous one. If you want to use
+//! preallocated output ensure it is continuous too, otherwise it will be reallocated.
+//!
+//! Being implemented via CUFFT real-to-complex transform result contains only non-redundant values
+//! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved.
+//!
+//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format.
+CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
+
+struct CV_EXPORTS ConvolveBuf
+{
+    Size result_size;
+    Size block_size;
+    Size user_block_size;
+    Size dft_size;
+    int spect_len;
+
+    GpuMat image_spect, templ_spect, result_spect;
+    GpuMat image_block, templ_block, result_data;
+
+    void create(Size image_size, Size templ_size);
+    static Size estimateBlockSize(Size result_size, Size templ_size);
+};
+
+
+//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
+//! supports source images of 32FC1 type only
+//! result matrix will have 32FC1 type
+CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr = false);
+CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null());
+
 }} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPUARITHM_HPP__ */
diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
index 603d2448e..fd388edcd 100644
--- a/modules/gpuarithm/perf/perf_core.cpp
+++ b/modules/gpuarithm/perf/perf_core.cpp
@@ -2156,6 +2156,108 @@ PERF_TEST_P(Sz_Depth_NormType, Core_Normalize,
     }
 }
 
+//////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
+
+PERF_TEST_P(Sz_Flags, ImgProc_MulSpectrums,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
+
+    cv::Mat a(size, CV_32FC2);
+    cv::Mat b(size, CV_32FC2);
+    declare.in(a, b, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_a(a);
+        const cv::gpu::GpuMat d_b(b);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::mulSpectrums(d_a, d_b, dst, flag);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MulAndScaleSpectrums
+
+PERF_TEST_P(Sz, ImgProc_MulAndScaleSpectrums,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    const float scale = 1.f / size.area();
+
+    cv::Mat src1(size, CV_32FC2);
+    cv::Mat src2(size, CV_32FC2);
+    declare.in(src1,src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Dft
+
+PERF_TEST_P(Sz_Flags, ImgProc_Dft,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
+{
+    declare.time(10.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
+
+    cv::Mat src(size, CV_32FC2);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::dft(d_src, dst, size, flag);
+
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::dft(src, dst, flag);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
 #ifdef HAVE_OPENCV_IMGPROC
 
 //////////////////////////////////////////////////////////////////////
@@ -2255,4 +2357,52 @@ PERF_TEST_P(Sz, ImgProc_IntegralSqr,
     }
 }
 
+//////////////////////////////////////////////////////////////////////
+// Convolve
+
+DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool);
+
+PERF_TEST_P(Sz_KernelSz_Ccorr, ImgProc_Convolve,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(17, 27, 32, 64),
+                    Bool()))
+{
+    declare.time(10.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int templ_size = GET_PARAM(1);
+    const bool ccorr = GET_PARAM(2);
+
+    const cv::Mat image(size, CV_32FC1);
+    const cv::Mat templ(templ_size, templ_size, CV_32FC1);
+    declare.in(image, templ, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat d_image = cv::gpu::createContinuous(size, CV_32FC1);
+        d_image.upload(image);
+
+        cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
+        d_templ.upload(templ);
+
+        cv::gpu::GpuMat dst;
+        cv::gpu::ConvolveBuf d_buf;
+
+        TEST_CYCLE() cv::gpu::convolve(d_image, d_templ, dst, ccorr, d_buf);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        if (ccorr)
+            FAIL_NO_CPU();
+
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
 #endif
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index baf598d96..59fd2ee1d 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -64,14 +64,15 @@ void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, co
 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); }
+void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); }
+void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::ConvolveBuf::create(Size, Size) { throw_no_cuda(); }
+void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_no_cuda(); }
+void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-////////////////////////////////////////////////////////////////////////
-// gemm
-
-#ifdef HAVE_CUBLAS
-
 namespace
 {
     #define error_entry(entry)  { entry, #entry }
@@ -89,42 +90,93 @@ namespace
         bool operator()(const ErrorEntry& e) const { return e.code == code; }
     };
 
-    const ErrorEntry cublas_errors[] =
+    String getErrorString(int code, const ErrorEntry* errors, size_t n)
     {
-        error_entry( CUBLAS_STATUS_SUCCESS ),
-        error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
-        error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
-        error_entry( CUBLAS_STATUS_INVALID_VALUE ),
-        error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
-        error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
-        error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
-        error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
-    };
+        size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
 
-    const size_t cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
+        const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
+        String str = cv::format("%s [Code = %d]", msg, code);
 
-    static inline void ___cublasSafeCall(cublasStatus_t err, const char* file, const int line, const char* func)
-    {
-        if (CUBLAS_STATUS_SUCCESS != err)
-        {
-            size_t idx = std::find_if(cublas_errors, cublas_errors + cublas_error_num, ErrorEntryComparer(err)) - cublas_errors;
-
-            const char* msg = (idx != cublas_error_num) ? cublas_errors[idx].str : "Unknown error code";
-            String str = cv::format("%s [Code = %d]", msg, err);
-
-            cv::error(cv::Error::GpuApiCallError, str, func, file, line);
-        }
+        return str;
     }
 }
 
-#if defined(__GNUC__)
-    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
-#else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, "")
-#endif
+#ifdef HAVE_CUBLAS
+    namespace
+    {
+        const ErrorEntry cublas_errors[] =
+        {
+            error_entry( CUBLAS_STATUS_SUCCESS ),
+            error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
+            error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
+            error_entry( CUBLAS_STATUS_INVALID_VALUE ),
+            error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
+            error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
+            error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
+            error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
+        };
+
+        const size_t cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
+
+        static inline void ___cublasSafeCall(cublasStatus_t err, const char* file, const int line, const char* func)
+        {
+            if (CUBLAS_STATUS_SUCCESS != err)
+            {
+                String msg = getErrorString(err, cublas_errors, cublas_error_num);
+                cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
+            }
+        }
+    }
+
+    #if defined(__GNUC__)
+        #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
+    #else /* defined(__CUDACC__) || defined(__MSVC__) */
+        #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, "")
+    #endif
+#endif // HAVE_CUBLAS
+
+#ifdef HAVE_CUFFT
+    namespace
+    {
+        //////////////////////////////////////////////////////////////////////////
+        // CUFFT errors
+
+        const ErrorEntry cufft_errors[] =
+        {
+            error_entry( CUFFT_INVALID_PLAN ),
+            error_entry( CUFFT_ALLOC_FAILED ),
+            error_entry( CUFFT_INVALID_TYPE ),
+            error_entry( CUFFT_INVALID_VALUE ),
+            error_entry( CUFFT_INTERNAL_ERROR ),
+            error_entry( CUFFT_EXEC_FAILED ),
+            error_entry( CUFFT_SETUP_FAILED ),
+            error_entry( CUFFT_INVALID_SIZE ),
+            error_entry( CUFFT_UNALIGNED_DATA )
+        };
+
+        const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
+
+        void ___cufftSafeCall(int err, const char* file, const int line, const char* func)
+        {
+            if (CUFFT_SUCCESS != err)
+            {
+                String msg = getErrorString(err, cufft_errors, cufft_error_num);
+                cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
+            }
+        }
+    }
+
+    #if defined(__GNUC__)
+        #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
+    #else /* defined(__CUDACC__) || defined(__MSVC__) */
+        #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, "")
+    #endif
 
 #endif
 
+////////////////////////////////////////////////////////////////////////
+// gemm
+
 void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags, Stream& stream)
 {
 #ifndef HAVE_CUBLAS
@@ -836,4 +888,289 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
 #endif
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// mulSpectrums
+
+namespace cv { namespace gpu { namespace cudev
+{
+    void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
+
+    void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
+}}}
+
+void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
+{
+    (void)flags;
+
+    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream);
+
+    static Caller callers[] = { cudev::mulSpectrums, cudev::mulSpectrums_CONJ };
+
+    CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
+    CV_Assert(a.size() == b.size());
+
+    c.create(a.size(), CV_32FC2);
+
+    Caller caller = callers[(int)conjB];
+    caller(a, b, c, StreamAccessor::getStream(stream));
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// mulAndScaleSpectrums
+
+namespace cv { namespace gpu { namespace cudev
+{
+    void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
+
+    void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
+}}}
+
+void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
+{
+    (void)flags;
+
+    typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, PtrStepSz<cufftComplex>, cudaStream_t stream);
+    static Caller callers[] = { cudev::mulAndScaleSpectrums, cudev::mulAndScaleSpectrums_CONJ };
+
+    CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
+    CV_Assert(a.size() == b.size());
+
+    c.create(a.size(), CV_32FC2);
+
+    Caller caller = callers[(int)conjB];
+    caller(a, b, scale, c, StreamAccessor::getStream(stream));
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// dft
+
+void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stream& stream)
+{
+#ifndef HAVE_CUFFT
+
+    OPENCV_GPU_UNUSED(src);
+    OPENCV_GPU_UNUSED(dst);
+    OPENCV_GPU_UNUSED(dft_size);
+    OPENCV_GPU_UNUSED(flags);
+    OPENCV_GPU_UNUSED(stream);
+
+    throw_no_cuda();
+
+#else
+
+    CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
+
+    // We don't support unpacked output (in the case of real input)
+    CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));
+
+    bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);
+    int is_row_dft = flags & DFT_ROWS;
+    int is_scaled_dft = flags & DFT_SCALE;
+    int is_inverse = flags & DFT_INVERSE;
+    bool is_complex_input = src.channels() == 2;
+    bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
+
+    // We don't support real-to-real transform
+    CV_Assert(is_complex_input || is_complex_output);
+
+    GpuMat src_data;
+
+    // Make sure here we work with the continuous input,
+    // as CUFFT can't handle gaps
+    src_data = src;
+    createContinuous(src.rows, src.cols, src.type(), src_data);
+    if (src_data.data != src.data)
+        src.copyTo(src_data);
+
+    Size dft_size_opt = dft_size;
+    if (is_1d_input && !is_row_dft)
+    {
+        // If the source matrix is single column handle it as single row
+        dft_size_opt.width = std::max(dft_size.width, dft_size.height);
+        dft_size_opt.height = std::min(dft_size.width, dft_size.height);
+    }
+
+    cufftType dft_type = CUFFT_R2C;
+    if (is_complex_input)
+        dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
+
+    CV_Assert(dft_size_opt.width > 1);
+
+    cufftHandle plan;
+    if (is_1d_input || is_row_dft)
+        cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);
+    else
+        cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);
+
+    cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
+
+    if (is_complex_input)
+    {
+        if (is_complex_output)
+        {
+            createContinuous(dft_size, CV_32FC2, dst);
+            cufftSafeCall(cufftExecC2C(
+                    plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
+                    is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
+        }
+        else
+        {
+            createContinuous(dft_size, CV_32F, dst);
+            cufftSafeCall(cufftExecC2R(
+                    plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
+        }
+    }
+    else
+    {
+        // We could swap dft_size for efficiency. Here we must reflect it
+        if (dft_size == dft_size_opt)
+            createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);
+        else
+            createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);
+
+        cufftSafeCall(cufftExecR2C(
+                plan, src_data.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
+    }
+
+    cufftSafeCall(cufftDestroy(plan));
+
+    if (is_scaled_dft)
+        multiply(dst, Scalar::all(1. / dft_size.area()), dst, 1, -1, stream);
+
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// convolve
+
+void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
+{
+    result_size = Size(image_size.width - templ_size.width + 1,
+                       image_size.height - templ_size.height + 1);
+
+    block_size = user_block_size;
+    if (user_block_size.width == 0 || user_block_size.height == 0)
+        block_size = estimateBlockSize(result_size, templ_size);
+
+    dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
+    dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
+
+    // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192),
+    // see CUDA Toolkit 4.1 CUFFT Library Programming Guide
+    if (dft_size.width > 8192)
+        dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);
+    if (dft_size.height > 8192)
+        dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
+
+    // To avoid wasting time doing small DFTs
+    dft_size.width = std::max(dft_size.width, 512);
+    dft_size.height = std::max(dft_size.height, 512);
+
+    createContinuous(dft_size, CV_32F, image_block);
+    createContinuous(dft_size, CV_32F, templ_block);
+    createContinuous(dft_size, CV_32F, result_data);
+
+    spect_len = dft_size.height * (dft_size.width / 2 + 1);
+    createContinuous(1, spect_len, CV_32FC2, image_spect);
+    createContinuous(1, spect_len, CV_32FC2, templ_spect);
+    createContinuous(1, spect_len, CV_32FC2, result_spect);
+
+    // Use maximum result matrix block size for the estimated DFT block size
+    block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
+    block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
+}
+
+
+Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size*/)
+{
+    int width = (result_size.width + 2) / 3;
+    int height = (result_size.height + 2) / 3;
+    width = std::min(width, result_size.width);
+    height = std::min(height, result_size.height);
+    return Size(width, height);
+}
+
+
+void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr)
+{
+    ConvolveBuf buf;
+    convolve(image, templ, result, ccorr, buf);
+}
+
+void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
+{
+    using namespace ::cv::gpu::cudev::imgproc;
+
+#ifndef HAVE_CUFFT
+    throw_no_cuda();
+#else
+    CV_Assert(image.type() == CV_32F);
+    CV_Assert(templ.type() == CV_32F);
+
+    buf.create(image.size(), templ.size());
+    result.create(buf.result_size, CV_32F);
+
+    Size& block_size = buf.block_size;
+    Size& dft_size = buf.dft_size;
+
+    GpuMat& image_block = buf.image_block;
+    GpuMat& templ_block = buf.templ_block;
+    GpuMat& result_data = buf.result_data;
+
+    GpuMat& image_spect = buf.image_spect;
+    GpuMat& templ_spect = buf.templ_spect;
+    GpuMat& result_spect = buf.result_spect;
+
+    cufftHandle planR2C, planC2R;
+    cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));
+    cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));
+
+    cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) );
+    cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) );
+
+    GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);
+    copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
+                   templ_block.cols - templ_roi.cols, 0, Scalar(), stream);
+
+    cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(),
+                               templ_spect.ptr<cufftComplex>()));
+
+    // Process all blocks of the result matrix
+    for (int y = 0; y < result.rows; y += block_size.height)
+    {
+        for (int x = 0; x < result.cols; x += block_size.width)
+        {
+            Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
+                                std::min(y + dft_size.height, image.rows) - y);
+            GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x),
+                             image.step);
+            copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
+                           0, image_block.cols - image_roi.cols, 0, Scalar(), stream);
+
+            cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(),
+                                       image_spect.ptr<cufftComplex>()));
+            mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
+                                 1.f / dft_size.area(), ccorr, stream);
+            cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
+                                       result_data.ptr<cufftReal>()));
+
+            Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
+                                 std::min(y + block_size.height, result.rows) - y);
+            GpuMat result_roi(result_roi_size, result.type(),
+                              (void*)(result.ptr<float>(y) + x), result.step);
+            GpuMat result_block(result_roi_size, result_data.type(),
+                                result_data.ptr(), result_data.step);
+
+            if (stream)
+                stream.enqueueCopy(result_block, result_roi);
+            else
+                result_block.copyTo(result_roi);
+        }
+    }
+
+    cufftSafeCall(cufftDestroy(planR2C));
+    cufftSafeCall(cufftDestroy(planC2R));
+#endif
+}
+
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuarithm/src/cuda/mul_spectrums.cu b/modules/gpuarithm/src/cuda/mul_spectrums.cu
new file mode 100644
index 000000000..1b58b8ca7
--- /dev/null
+++ b/modules/gpuarithm/src/cuda/mul_spectrums.cu
@@ -0,0 +1,171 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "cvconfig.h"
+
+#ifdef HAVE_CUFFT
+
+#include <cufft.h>
+
+#include "opencv2/core/cuda/common.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    //////////////////////////////////////////////////////////////////////////
+    // mulSpectrums
+
+    __global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x < c.cols && y < c.rows)
+        {
+            c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
+        }
+    }
+
+
+    void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
+    {
+        dim3 threads(256);
+        dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
+
+        mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // mulSpectrums_CONJ
+
+    __global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x < c.cols && y < c.rows)
+        {
+            c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
+        }
+    }
+
+
+    void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
+    {
+        dim3 threads(256);
+        dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
+
+        mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // mulAndScaleSpectrums
+
+    __global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x < c.cols && y < c.rows)
+        {
+            cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
+            c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
+        }
+    }
+
+
+    void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
+    {
+        dim3 threads(256);
+        dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
+
+        mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // mulAndScaleSpectrums_CONJ
+
+    __global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x < c.cols && y < c.rows)
+        {
+            cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
+            c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
+        }
+    }
+
+
+    void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
+    {
+        dim3 threads(256);
+        dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
+
+        mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+}}} // namespace cv { namespace gpu { namespace cudev
+
+#endif // HAVE_CUFFT
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuarithm/src/precomp.hpp b/modules/gpuarithm/src/precomp.hpp
index 6e21684aa..f8e38e8cd 100644
--- a/modules/gpuarithm/src/precomp.hpp
+++ b/modules/gpuarithm/src/precomp.hpp
@@ -59,7 +59,11 @@
 #endif
 
 #ifdef HAVE_CUBLAS
-    #include <cublas.h>
+#  include <cublas.h>
+#endif
+
+#ifdef HAVE_CUFFT
+#  include <cufft.h>
 #endif
 
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp
index 36c155480..dd8f854d8 100644
--- a/modules/gpuarithm/test/test_core.cpp
+++ b/modules/gpuarithm/test/test_core.cpp
@@ -3607,6 +3607,278 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Normalize, testing::Combine(
     testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF), NormCode(cv::NORM_MINMAX)),
     WHOLE_SUBMAT));
 
+////////////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int flag;
+
+    cv::Mat a, b;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        flag = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        a = randomMat(size, CV_32FC2);
+        b = randomMat(size, CV_32FC2);
+    }
+};
+
+GPU_TEST_P(MulSpectrums, Simple)
+{
+    cv::gpu::GpuMat c;
+    cv::gpu::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
+
+    cv::Mat c_gold;
+    cv::mulSpectrums(a, b, c_gold, flag, false);
+
+    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
+}
+
+GPU_TEST_P(MulSpectrums, Scaled)
+{
+    float scale = 1.f / size.area();
+
+    cv::gpu::GpuMat c;
+    cv::gpu::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
+
+    cv::Mat c_gold;
+    cv::mulSpectrums(a, b, c_gold, flag, false);
+    c_gold.convertTo(c_gold, c_gold.type(), scale);
+
+    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MulSpectrums, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
+
+////////////////////////////////////////////////////////////////////////////
+// Dft
+
+struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+namespace
+{
+    void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
+    {
+        SCOPED_TRACE(hint);
+
+        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
+
+        cv::Mat b_gold;
+        cv::dft(a, b_gold, flags);
+
+        cv::gpu::GpuMat d_b;
+        cv::gpu::GpuMat d_b_data;
+        if (inplace)
+        {
+            d_b_data.create(1, a.size().area(), CV_32FC2);
+            d_b = cv::gpu::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+        }
+        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
+
+        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
+        ASSERT_EQ(CV_32F, d_b.depth());
+        ASSERT_EQ(2, d_b.channels());
+        EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
+    }
+}
+
+GPU_TEST_P(Dft, C2C)
+{
+    int cols = randomInt(2, 100);
+    int rows = randomInt(2, 100);
+
+    for (int i = 0; i < 2; ++i)
+    {
+        bool inplace = i != 0;
+
+        testC2C("no flags", cols, rows, 0, inplace);
+        testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
+        testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
+        testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
+        testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
+        testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
+        testC2C("single col", 1, rows, 0, inplace);
+        testC2C("single row", cols, 1, 0, inplace);
+        testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
+        testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
+        testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
+        testC2C("size 1 2", 1, 2, 0, inplace);
+        testC2C("size 2 1", 2, 1, 0, inplace);
+    }
+}
+
+namespace
+{
+    void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
+    {
+        SCOPED_TRACE(hint);
+
+        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
+
+        cv::gpu::GpuMat d_b, d_c;
+        cv::gpu::GpuMat d_b_data, d_c_data;
+        if (inplace)
+        {
+            if (a.cols == 1)
+            {
+                d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
+                d_b = cv::gpu::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+            }
+            else
+            {
+                d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
+                d_b = cv::gpu::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
+            }
+            d_c_data.create(1, a.size().area(), CV_32F);
+            d_c = cv::gpu::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
+        }
+
+        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
+        cv::gpu::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
+
+        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
+        EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
+        ASSERT_EQ(CV_32F, d_c.depth());
+        ASSERT_EQ(1, d_c.channels());
+
+        cv::Mat c(d_c);
+        EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
+    }
+}
+
+GPU_TEST_P(Dft, R2CThenC2R)
+{
+    int cols = randomInt(2, 100);
+    int rows = randomInt(2, 100);
+
+    testR2CThenC2R("sanity", cols, rows, false);
+    testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
+    testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
+    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
+    testR2CThenC2R("single col", 1, rows, false);
+    testR2CThenC2R("single col 1", 1, rows + 1, false);
+    testR2CThenC2R("single row", cols, 1, false);
+    testR2CThenC2R("single row 1", cols + 1, 1, false);
+
+    testR2CThenC2R("sanity", cols, rows, true);
+    testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
+    testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
+    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
+    testR2CThenC2R("single row", cols, 1, true);
+    testR2CThenC2R("single row 1", cols + 1, 1, true);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Dft, ALL_DEVICES);
+
+////////////////////////////////////////////////////////
+// Convolve
+
+namespace
+{
+    void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
+    {
+        // reallocate the output array if needed
+        C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
+        cv::Size dftSize;
+
+        // compute the size of DFT transform
+        dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
+        dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
+
+        // allocate temporary buffers and initialize them with 0s
+        cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
+        cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
+
+        // copy A and B to the top-left corners of tempA and tempB, respectively
+        cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
+        A.copyTo(roiA);
+        cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
+        B.copyTo(roiB);
+
+        // now transform the padded A & B in-place;
+        // use "nonzeroRows" hint for faster processing
+        cv::dft(tempA, tempA, 0, A.rows);
+        cv::dft(tempB, tempB, 0, B.rows);
+
+        // multiply the spectrums;
+        // the function handles packed spectrum representations well
+        cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
+
+        // transform the product back from the frequency domain.
+        // Even though all the result rows will be non-zero,
+        // you need only the first C.rows of them, and thus you
+        // pass nonzeroRows == C.rows
+        cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
+
+        // now copy the result back to C.
+        tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
+    }
+
+    IMPLEMENT_PARAM_CLASS(KSize, int)
+    IMPLEMENT_PARAM_CLASS(Ccorr, bool)
+}
+
+PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int ksize;
+    bool ccorr;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        ksize = GET_PARAM(2);
+        ccorr = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Convolve, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
+    cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::convolve(loadMat(src), loadMat(kernel), dst, ccorr);
+
+    cv::Mat dst_gold;
+    convolveDFT(src, kernel, dst_gold, ccorr);
+
+    EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Convolve, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
+    testing::Values(Ccorr(false), Ccorr(true))));
+
 #ifdef HAVE_OPENCV_IMGPROC
 
 //////////////////////////////////////////////////////////////////////////////

From e41aea0acf0d6d40b03a2f38499f135504101752 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 18:14:35 +0400
Subject: [PATCH 14/49] gpuimgproc module for image processing

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/doc/gpu.rst                       |   1 -
 modules/gpu/include/opencv2/gpu.hpp           | 377 +--------------
 ...ter.cpp => disparity_bilateral_filter.cpp} |   0
 modules/gpu/test/test_precomp.hpp             |   2 -
 .../test/test_threshold.cpp                   |   0
 modules/gpuimgproc/CMakeLists.txt             |   9 +
 modules/gpuimgproc/doc/gpuimgproc.rst         |   8 +
 .../doc/image_processing.rst                  |   0
 .../gpuimgproc/include/opencv2/gpuimgproc.hpp | 441 ++++++++++++++++++
 .../perf/perf_denoising.cpp                   |   0
 .../{gpu => gpuimgproc}/perf/perf_imgproc.cpp |  46 +-
 .../perf/perf_labeling.cpp                    |   0
 modules/gpuimgproc/perf/perf_main.cpp         |  47 ++
 modules/gpuimgproc/perf/perf_precomp.cpp      |  43 ++
 modules/gpuimgproc/perf/perf_precomp.hpp      |  66 +++
 modules/{gpu => gpuimgproc}/src/blend.cpp     |   0
 modules/{gpu => gpuimgproc}/src/color.cpp     |   0
 .../src/cuda/bilateral_filter.cu              |   0
 modules/{gpu => gpuimgproc}/src/cuda/blend.cu |   0
 modules/{gpu => gpuimgproc}/src/cuda/canny.cu |   0
 .../src/cuda/ccomponetns.cu                   |   0
 modules/{gpu => gpuimgproc}/src/cuda/clahe.cu |   0
 modules/{gpu => gpuimgproc}/src/cuda/color.cu |   0
 .../{gpu => gpuimgproc}/src/cuda/debayer.cu   |   0
 modules/{gpu => gpuimgproc}/src/cuda/gftt.cu  |   0
 modules/{gpu => gpuimgproc}/src/cuda/hist.cu  |   0
 modules/{gpu => gpuimgproc}/src/cuda/hough.cu |   0
 .../{gpu => gpuimgproc}/src/cuda/imgproc.cu   |   1 -
 .../src/cuda/match_template.cu                |   0
 modules/{gpu => gpuimgproc}/src/cuda/nlm.cu   |   0
 .../{gpu => gpuimgproc}/src/cuda/pyr_down.cu  |   0
 .../{gpu => gpuimgproc}/src/cuda/pyr_up.cu    |   0
 modules/{gpu => gpuimgproc}/src/cuda/remap.cu |   0
 .../{gpu => gpuimgproc}/src/cuda/resize.cu    |   0
 modules/{gpu => gpuimgproc}/src/cuda/warp.cu  |   0
 .../src/cvt_color_internal.h                  |   0
 modules/{gpu => gpuimgproc}/src/denoising.cpp |   0
 modules/{gpu => gpuimgproc}/src/gftt.cpp      |   0
 modules/{gpu => gpuimgproc}/src/graphcuts.cpp |   0
 modules/{gpu => gpuimgproc}/src/hough.cpp     |   0
 modules/{gpu => gpuimgproc}/src/imgproc.cpp   |   0
 .../src/match_template.cpp                    |   0
 .../src/mssegmentation.cpp                    |   0
 modules/gpuimgproc/src/precomp.cpp            |  43 ++
 modules/gpuimgproc/src/precomp.hpp            |  53 +++
 modules/{gpu => gpuimgproc}/src/pyramids.cpp  |   0
 modules/{gpu => gpuimgproc}/src/remap.cpp     |   0
 modules/{gpu => gpuimgproc}/src/resize.cpp    |   0
 modules/{gpu => gpuimgproc}/src/warp.cpp      |   0
 .../test/interpolation.hpp                    |   0
 .../{gpu => gpuimgproc}/test/test_color.cpp   |   0
 .../test/test_denoising.cpp                   |   0
 .../{gpu => gpuimgproc}/test/test_hough.cpp   |   0
 .../{gpu => gpuimgproc}/test/test_imgproc.cpp |   0
 .../test/test_labeling.cpp                    |   0
 modules/gpuimgproc/test/test_main.cpp         |  45 ++
 modules/gpuimgproc/test/test_precomp.cpp      |  43 ++
 modules/gpuimgproc/test/test_precomp.hpp      |  63 +++
 .../test/test_pyramids.cpp                    |   0
 .../{gpu => gpuimgproc}/test/test_remap.cpp   |   0
 .../{gpu => gpuimgproc}/test/test_resize.cpp  |   0
 .../test/test_warp_affine.cpp                 |   0
 .../test/test_warp_perspective.cpp            |   0
 samples/cpp/CMakeLists.txt                    |   1 +
 samples/gpu/CMakeLists.txt                    |   2 +-
 66 files changed, 889 insertions(+), 404 deletions(-)
 rename modules/gpu/src/{bilateral_filter.cpp => disparity_bilateral_filter.cpp} (100%)
 rename modules/{gpu => gpuarithm}/test/test_threshold.cpp (100%)
 create mode 100644 modules/gpuimgproc/CMakeLists.txt
 create mode 100644 modules/gpuimgproc/doc/gpuimgproc.rst
 rename modules/{gpu => gpuimgproc}/doc/image_processing.rst (100%)
 create mode 100644 modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
 rename modules/{gpu => gpuimgproc}/perf/perf_denoising.cpp (100%)
 rename modules/{gpu => gpuimgproc}/perf/perf_imgproc.cpp (97%)
 rename modules/{gpu => gpuimgproc}/perf/perf_labeling.cpp (100%)
 create mode 100644 modules/gpuimgproc/perf/perf_main.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_precomp.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_precomp.hpp
 rename modules/{gpu => gpuimgproc}/src/blend.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/color.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/bilateral_filter.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/blend.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/canny.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/ccomponetns.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/clahe.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/color.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/debayer.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/gftt.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/hist.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/hough.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/imgproc.cu (99%)
 rename modules/{gpu => gpuimgproc}/src/cuda/match_template.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/nlm.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/pyr_down.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/pyr_up.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/remap.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/resize.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cuda/warp.cu (100%)
 rename modules/{gpu => gpuimgproc}/src/cvt_color_internal.h (100%)
 rename modules/{gpu => gpuimgproc}/src/denoising.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/gftt.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/graphcuts.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/hough.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/imgproc.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/match_template.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/mssegmentation.cpp (100%)
 create mode 100644 modules/gpuimgproc/src/precomp.cpp
 create mode 100644 modules/gpuimgproc/src/precomp.hpp
 rename modules/{gpu => gpuimgproc}/src/pyramids.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/remap.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/resize.cpp (100%)
 rename modules/{gpu => gpuimgproc}/src/warp.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/interpolation.hpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_color.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_denoising.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_hough.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_imgproc.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_labeling.cpp (100%)
 create mode 100644 modules/gpuimgproc/test/test_main.cpp
 create mode 100644 modules/gpuimgproc/test/test_precomp.cpp
 create mode 100644 modules/gpuimgproc/test/test_precomp.hpp
 rename modules/{gpu => gpuimgproc}/test/test_pyramids.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_remap.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_resize.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_warp_affine.cpp (100%)
 rename modules/{gpu => gpuimgproc}/test/test_warp_perspective.cpp (100%)

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 55fc1007e..ee66608a2 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -4,7 +4,7 @@ endif()
 
 set(the_description "GPU-accelerated Computer Vision")
 
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters OPTIONAL opencv_gpunvidia)
+ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc OPTIONAL opencv_gpunvidia)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
 
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index de52ceaba..6c082ccd1 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -8,7 +8,6 @@ gpu. GPU-accelerated Computer Vision
     introduction
     initalization_and_information
     data_structures
-    image_processing
     object_detection
     feature_detection_and_description
     camera_calibration_and_3d_reconstruction
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 19fd7c93e..739732123 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -52,6 +52,8 @@
 #include "opencv2/core/gpumat.hpp"
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
+#include "opencv2/gpuimgproc.hpp"
+
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/features2d.hpp"
@@ -60,280 +62,7 @@ namespace cv { namespace gpu {
 ////////////////////////////// Image processing //////////////////////////////
 
 
-enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
-       ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
 
-//! Composite two images using alpha opacity values contained in each image
-//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
-CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
-
-//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
-//! supports only CV_32FC1 map type
-CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
-                      int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(),
-                      Stream& stream = Stream::Null());
-
-//! Does mean shift filtering on GPU.
-CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
-                                   TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
-                                   Stream& stream = Stream::Null());
-
-//! Does mean shift procedure on GPU.
-CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
-                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
-                              Stream& stream = Stream::Null());
-
-//! Does mean shift segmentation with elimination of small regions.
-CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
-                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
-
-//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
-//! Supported types of input disparity: CV_8U, CV_16S.
-//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
-CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null());
-
-//! Reprojects disparity image to 3D space.
-//! Supports CV_8U and CV_16S types of input disparity.
-//! The output is a 3- or 4-channel floating-point matrix.
-//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
-//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
-CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null());
-
-//! converts image from one color space to another
-CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
-
-enum
-{
-    // Bayer Demosaicing (Malvar, He, and Cutler)
-    COLOR_BayerBG2BGR_MHT = 256,
-    COLOR_BayerGB2BGR_MHT = 257,
-    COLOR_BayerRG2BGR_MHT = 258,
-    COLOR_BayerGR2BGR_MHT = 259,
-
-    COLOR_BayerBG2RGB_MHT = COLOR_BayerRG2BGR_MHT,
-    COLOR_BayerGB2RGB_MHT = COLOR_BayerGR2BGR_MHT,
-    COLOR_BayerRG2RGB_MHT = COLOR_BayerBG2BGR_MHT,
-    COLOR_BayerGR2RGB_MHT = COLOR_BayerGB2BGR_MHT,
-
-    COLOR_BayerBG2GRAY_MHT = 260,
-    COLOR_BayerGB2GRAY_MHT = 261,
-    COLOR_BayerRG2GRAY_MHT = 262,
-    COLOR_BayerGR2GRAY_MHT = 263
-};
-CV_EXPORTS void demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn = -1, Stream& stream = Stream::Null());
-
-//! swap channels
-//! dstOrder - Integer array describing how channel values are permutated. The n-th entry
-//!            of the array contains the number of the channel that is stored in the n-th channel of
-//!            the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR
-//!            channel order.
-CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null());
-
-//! Routines for correcting image color gamma
-CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
-
-//! resizes the image
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
-CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
-
-//! warps the image using affine transformation
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
-    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
-
-CV_EXPORTS void buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
-
-//! warps the image using perspective transformation
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
-    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
-
-CV_EXPORTS void buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
-
-//! builds plane warping maps
-CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, float scale,
-                                   GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
-
-//! builds cylindrical warping maps
-CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                         GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
-
-//! builds spherical warping maps
-CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                       GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
-
-//! rotates an image around the origin (0,0) and then shifts it
-//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
-CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
-                       int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
-
-//! computes Harris cornerness criteria at each image pixel
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
-                             int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
-
-//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
-    int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
-
-struct CV_EXPORTS MatchTemplateBuf
-{
-    Size user_block_size;
-    GpuMat imagef, templf;
-    std::vector<GpuMat> images;
-    std::vector<GpuMat> image_sums;
-    std::vector<GpuMat> image_sqsums;
-};
-
-//! computes the proximity map for the raster template and the image where the template is searched for
-CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null());
-
-//! computes the proximity map for the raster template and the image where the template is searched for
-CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null());
-
-//! smoothes the source image and downsamples it
-CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! upsamples the source image and then smoothes it
-CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! performs linear blending of two images
-//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
-CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
-                            GpuMat& result, Stream& stream = Stream::Null());
-
-//! Performa bilateral filtering of passsed image
-CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial,
-                                int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());
-
-//! Brute force non-local means algorith (slow but universal)
-CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
-
-//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique)
-class CV_EXPORTS FastNonLocalMeansDenoising
-{
-public:
-    //! Simple method, recommended for grayscale images (though it supports multichannel images)
-    void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
-
-    //! Processes luminance and color components separatelly
-    void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
-
-private:
-
-    GpuMat buffer, extended_src_buffer;
-    GpuMat lab, l, ab;
-};
-
-struct CV_EXPORTS CannyBuf
-{
-    void create(const Size& image_size, int apperture_size = 3);
-    void release();
-
-    GpuMat dx, dy;
-    GpuMat mag;
-    GpuMat map;
-    GpuMat st1, st2;
-    Ptr<FilterEngine_GPU> filterDX, filterDY;
-};
-
-CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
-CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
-
-class CV_EXPORTS ImagePyramid
-{
-public:
-    inline ImagePyramid() : nLayers_(0) {}
-    inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null())
-    {
-        build(img, nLayers, stream);
-    }
-
-    void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null());
-
-    void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const;
-
-    inline void release()
-    {
-        layer0_.release();
-        pyramid_.clear();
-        nLayers_ = 0;
-    }
-
-private:
-    GpuMat layer0_;
-    std::vector<GpuMat> pyramid_;
-    int nLayers_;
-};
-
-//! HoughLines
-
-struct HoughLinesBuf
-{
-    GpuMat accum;
-    GpuMat list;
-};
-
-CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
-CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
-CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
-
-//! HoughLinesP
-
-//! finds line segments in the black-n-white image using probabalistic Hough transform
-CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);
-
-//! HoughCircles
-
-struct HoughCirclesBuf
-{
-    GpuMat edges;
-    GpuMat accum;
-    GpuMat list;
-    CannyBuf cannyBuf;
-};
-
-CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
-CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
-CV_EXPORTS void HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles);
-
-//! finds arbitrary template in the grayscale image using Generalized Hough Transform
-//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
-//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
-class CV_EXPORTS GeneralizedHough_GPU : public cv::Algorithm
-{
-public:
-    static Ptr<GeneralizedHough_GPU> create(int method);
-
-    virtual ~GeneralizedHough_GPU();
-
-    //! set template to search
-    void setTemplate(const GpuMat& templ, int cannyThreshold = 100, Point templCenter = Point(-1, -1));
-    void setTemplate(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter = Point(-1, -1));
-
-    //! find template on image
-    void detect(const GpuMat& image, GpuMat& positions, int cannyThreshold = 100);
-    void detect(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions);
-
-    void download(const GpuMat& d_positions, OutputArray h_positions, OutputArray h_votes = noArray());
-
-    void release();
-
-protected:
-    virtual void setTemplateImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter) = 0;
-    virtual void detectImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions) = 0;
-    virtual void releaseImpl() = 0;
-
-private:
-    GpuMat edges_;
-    CannyBuf cannyBuf_;
-};
 
 ///////////////////////////// Calibration 3D //////////////////////////////////
 
@@ -351,68 +80,11 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c
 
 //////////////////////////////// Image Labeling ////////////////////////////////
 
-//!performs labeling via graph cuts of a 2D regular 4-connected graph.
-CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
-                         GpuMat& buf, Stream& stream = Stream::Null());
 
-//!performs labeling via graph cuts of a 2D regular 8-connected graph.
-CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
-                         GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight,
-                         GpuMat& labels,
-                         GpuMat& buf, Stream& stream = Stream::Null());
-
-//! compute mask for Generalized Flood fill componetns labeling.
-CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
-
-//! performs connected componnents labeling.
-CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
 
 ////////////////////////////////// Histograms //////////////////////////////////
 
-//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
-CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel);
-//! Calculates histogram with evenly distributed bins for signle channel source.
-//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
-//! Output hist will have one row and histSize cols and CV_32SC1 type.
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
-//! Calculates histogram with evenly distributed bins for four-channel source.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
-//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
-//! Calculates histogram with bins determined by levels array.
-//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
-//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null());
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null());
-//! Calculates histogram with bins determined by levels array.
-//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
-//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null());
 
-//! Calculates histogram for 8u one channel image
-//! Output hist will have one row, 256 cols and CV32SC1 type.
-CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null());
-CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
-
-//! normalizes the grayscale image brightness and contrast by normalizing its histogram
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
-
-class CV_EXPORTS CLAHE : public cv::CLAHE
-{
-public:
-    using cv::CLAHE::apply;
-    virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
-};
-CV_EXPORTS Ptr<cv::gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 
 //////////////////////////////// StereoBM_GPU ////////////////////////////////
 
@@ -1097,52 +769,7 @@ public:
     GpuMat buf;
 };
 
-class CV_EXPORTS GoodFeaturesToTrackDetector_GPU
-{
-public:
-    explicit GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
-        int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
 
-    //! return 1 rows matrix with CV_32FC2 type
-    void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
-
-    int maxCorners;
-    double qualityLevel;
-    double minDistance;
-
-    int blockSize;
-    bool useHarrisDetector;
-    double harrisK;
-
-    void releaseMemory()
-    {
-        Dx_.release();
-        Dy_.release();
-        buf_.release();
-        eig_.release();
-        minMaxbuf_.release();
-        tmpCorners_.release();
-    }
-
-private:
-    GpuMat Dx_;
-    GpuMat Dy_;
-    GpuMat buf_;
-    GpuMat eig_;
-    GpuMat minMaxbuf_;
-    GpuMat tmpCorners_;
-};
-
-inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners_, double qualityLevel_, double minDistance_,
-        int blockSize_, bool useHarrisDetector_, double harrisK_)
-{
-    maxCorners = maxCorners_;
-    qualityLevel = qualityLevel_;
-    minDistance = minDistance_;
-    blockSize = blockSize_;
-    useHarrisDetector = useHarrisDetector_;
-    harrisK = harrisK_;
-}
 
 
 class CV_EXPORTS PyrLKOpticalFlow
diff --git a/modules/gpu/src/bilateral_filter.cpp b/modules/gpu/src/disparity_bilateral_filter.cpp
similarity index 100%
rename from modules/gpu/src/bilateral_filter.cpp
rename to modules/gpu/src/disparity_bilateral_filter.cpp
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index 08807d51e..f98f364b9 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -74,8 +74,6 @@
 #include "opencv2/ts/gpu_test.hpp"
 #include "opencv2/gpu.hpp"
 
-#include "interpolation.hpp"
-
 #include "opencv2/core/gpu_private.hpp"
 
 #endif
diff --git a/modules/gpu/test/test_threshold.cpp b/modules/gpuarithm/test/test_threshold.cpp
similarity index 100%
rename from modules/gpu/test/test_threshold.cpp
rename to modules/gpuarithm/test/test_threshold.cpp
diff --git a/modules/gpuimgproc/CMakeLists.txt b/modules/gpuimgproc/CMakeLists.txt
new file mode 100644
index 000000000..04a31d5e7
--- /dev/null
+++ b/modules/gpuimgproc/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpuimgproc)
+endif()
+
+set(the_description "GPU-accelerated Image Processing")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
+
+ocv_define_module(gpuimgproc opencv_imgproc opencv_gpuarithm opencv_gpufilters OPTIONAL opencv_photo)
diff --git a/modules/gpuimgproc/doc/gpuimgproc.rst b/modules/gpuimgproc/doc/gpuimgproc.rst
new file mode 100644
index 000000000..d4cba96a4
--- /dev/null
+++ b/modules/gpuimgproc/doc/gpuimgproc.rst
@@ -0,0 +1,8 @@
+*************************************
+gpu. GPU-accelerated Image Processing
+*************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    image_processing
diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpuimgproc/doc/image_processing.rst
similarity index 100%
rename from modules/gpu/doc/image_processing.rst
rename to modules/gpuimgproc/doc/image_processing.rst
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
new file mode 100644
index 000000000..d602d0a13
--- /dev/null
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -0,0 +1,441 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUIMGPROC_HPP__
+#define __OPENCV_GPUIMGPROC_HPP__
+
+#ifndef __cplusplus
+#  error gpuimgproc.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/gpumat.hpp"
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/imgproc.hpp"
+
+namespace cv { namespace gpu {
+
+enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
+       ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
+
+//! Composite two images using alpha opacity values contained in each image
+//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
+CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
+
+//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
+//! supports only CV_32FC1 map type
+CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
+                      int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(),
+                      Stream& stream = Stream::Null());
+
+//! Does mean shift filtering on GPU.
+CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
+                                   TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
+                                   Stream& stream = Stream::Null());
+
+//! Does mean shift procedure on GPU.
+CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
+                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
+                              Stream& stream = Stream::Null());
+
+//! Does mean shift segmentation with elimination of small regions.
+CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
+                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
+//! Supported types of input disparity: CV_8U, CV_16S.
+//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
+CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null());
+
+//! Reprojects disparity image to 3D space.
+//! Supports CV_8U and CV_16S types of input disparity.
+//! The output is a 3- or 4-channel floating-point matrix.
+//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
+//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
+CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null());
+
+//! converts image from one color space to another
+CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
+
+enum
+{
+    // Bayer Demosaicing (Malvar, He, and Cutler)
+    COLOR_BayerBG2BGR_MHT = 256,
+    COLOR_BayerGB2BGR_MHT = 257,
+    COLOR_BayerRG2BGR_MHT = 258,
+    COLOR_BayerGR2BGR_MHT = 259,
+
+    COLOR_BayerBG2RGB_MHT = COLOR_BayerRG2BGR_MHT,
+    COLOR_BayerGB2RGB_MHT = COLOR_BayerGR2BGR_MHT,
+    COLOR_BayerRG2RGB_MHT = COLOR_BayerBG2BGR_MHT,
+    COLOR_BayerGR2RGB_MHT = COLOR_BayerGB2BGR_MHT,
+
+    COLOR_BayerBG2GRAY_MHT = 260,
+    COLOR_BayerGB2GRAY_MHT = 261,
+    COLOR_BayerRG2GRAY_MHT = 262,
+    COLOR_BayerGR2GRAY_MHT = 263
+};
+CV_EXPORTS void demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn = -1, Stream& stream = Stream::Null());
+
+//! swap channels
+//! dstOrder - Integer array describing how channel values are permutated. The n-th entry
+//!            of the array contains the number of the channel that is stored in the n-th channel of
+//!            the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR
+//!            channel order.
+CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null());
+
+//! Routines for correcting image color gamma
+CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
+
+//! resizes the image
+//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
+CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
+
+//! warps the image using affine transformation
+//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
+
+CV_EXPORTS void buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+
+//! warps the image using perspective transformation
+//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
+
+CV_EXPORTS void buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+
+//! builds plane warping maps
+CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, float scale,
+                                   GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+
+//! builds cylindrical warping maps
+CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+                                         GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+
+//! builds spherical warping maps
+CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+                                       GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+
+//! rotates an image around the origin (0,0) and then shifts it
+//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
+CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
+                       int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
+
+//! computes Harris cornerness criteria at each image pixel
+CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
+CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
+CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
+                             int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
+
+//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
+CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
+CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
+CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
+    int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
+
+struct CV_EXPORTS MatchTemplateBuf
+{
+    Size user_block_size;
+    GpuMat imagef, templf;
+    std::vector<GpuMat> images;
+    std::vector<GpuMat> image_sums;
+    std::vector<GpuMat> image_sqsums;
+};
+
+//! computes the proximity map for the raster template and the image where the template is searched for
+CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null());
+
+//! computes the proximity map for the raster template and the image where the template is searched for
+CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null());
+
+//! smoothes the source image and downsamples it
+CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! upsamples the source image and then smoothes it
+CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! performs linear blending of two images
+//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
+                            GpuMat& result, Stream& stream = Stream::Null());
+
+//! Performa bilateral filtering of passsed image
+CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial,
+                                int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());
+
+//! Brute force non-local means algorith (slow but universal)
+CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
+
+//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique)
+class CV_EXPORTS FastNonLocalMeansDenoising
+{
+public:
+    //! Simple method, recommended for grayscale images (though it supports multichannel images)
+    void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
+
+    //! Processes luminance and color components separatelly
+    void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
+
+private:
+
+    GpuMat buffer, extended_src_buffer;
+    GpuMat lab, l, ab;
+};
+
+struct CV_EXPORTS CannyBuf
+{
+    void create(const Size& image_size, int apperture_size = 3);
+    void release();
+
+    GpuMat dx, dy;
+    GpuMat mag;
+    GpuMat map;
+    GpuMat st1, st2;
+    Ptr<FilterEngine_GPU> filterDX, filterDY;
+};
+
+CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
+CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
+
+class CV_EXPORTS ImagePyramid
+{
+public:
+    inline ImagePyramid() : nLayers_(0) {}
+    inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null())
+    {
+        build(img, nLayers, stream);
+    }
+
+    void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null());
+
+    void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const;
+
+    inline void release()
+    {
+        layer0_.release();
+        pyramid_.clear();
+        nLayers_ = 0;
+    }
+
+private:
+    GpuMat layer0_;
+    std::vector<GpuMat> pyramid_;
+    int nLayers_;
+};
+
+//! HoughLines
+
+struct HoughLinesBuf
+{
+    GpuMat accum;
+    GpuMat list;
+};
+
+CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
+CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
+CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
+
+//! HoughLinesP
+
+//! finds line segments in the black-n-white image using probabalistic Hough transform
+CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);
+
+//! HoughCircles
+
+struct HoughCirclesBuf
+{
+    GpuMat edges;
+    GpuMat accum;
+    GpuMat list;
+    CannyBuf cannyBuf;
+};
+
+CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+CV_EXPORTS void HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles);
+
+//! finds arbitrary template in the grayscale image using Generalized Hough Transform
+//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
+//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
+class CV_EXPORTS GeneralizedHough_GPU : public cv::Algorithm
+{
+public:
+    static Ptr<GeneralizedHough_GPU> create(int method);
+
+    virtual ~GeneralizedHough_GPU();
+
+    //! set template to search
+    void setTemplate(const GpuMat& templ, int cannyThreshold = 100, Point templCenter = Point(-1, -1));
+    void setTemplate(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter = Point(-1, -1));
+
+    //! find template on image
+    void detect(const GpuMat& image, GpuMat& positions, int cannyThreshold = 100);
+    void detect(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions);
+
+    void download(const GpuMat& d_positions, OutputArray h_positions, OutputArray h_votes = noArray());
+
+    void release();
+
+protected:
+    virtual void setTemplateImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter) = 0;
+    virtual void detectImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions) = 0;
+    virtual void releaseImpl() = 0;
+
+private:
+    GpuMat edges_;
+    CannyBuf cannyBuf_;
+};
+
+//!performs labeling via graph cuts of a 2D regular 4-connected graph.
+CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
+                         GpuMat& buf, Stream& stream = Stream::Null());
+
+//!performs labeling via graph cuts of a 2D regular 8-connected graph.
+CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
+                         GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight,
+                         GpuMat& labels,
+                         GpuMat& buf, Stream& stream = Stream::Null());
+
+//! compute mask for Generalized Flood fill componetns labeling.
+CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
+
+//! performs connected componnents labeling.
+CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
+
+//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
+CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel);
+//! Calculates histogram with evenly distributed bins for signle channel source.
+//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
+//! Output hist will have one row and histSize cols and CV_32SC1 type.
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
+//! Calculates histogram with evenly distributed bins for four-channel source.
+//! All channels of source are processed separately.
+//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
+//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
+//! Calculates histogram with bins determined by levels array.
+//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
+//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null());
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null());
+//! Calculates histogram with bins determined by levels array.
+//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+//! All channels of source are processed separately.
+//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
+//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null());
+
+//! Calculates histogram for 8u one channel image
+//! Output hist will have one row, 256 cols and CV32SC1 type.
+CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null());
+CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+
+//! normalizes the grayscale image brightness and contrast by normalizing its histogram
+CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
+CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+
+class CV_EXPORTS CLAHE : public cv::CLAHE
+{
+public:
+    using cv::CLAHE::apply;
+    virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
+};
+CV_EXPORTS Ptr<cv::gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+
+class CV_EXPORTS GoodFeaturesToTrackDetector_GPU
+{
+public:
+    explicit GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+        int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+
+    //! return 1 rows matrix with CV_32FC2 type
+    void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
+
+    int maxCorners;
+    double qualityLevel;
+    double minDistance;
+
+    int blockSize;
+    bool useHarrisDetector;
+    double harrisK;
+
+    void releaseMemory()
+    {
+        Dx_.release();
+        Dy_.release();
+        buf_.release();
+        eig_.release();
+        minMaxbuf_.release();
+        tmpCorners_.release();
+    }
+
+private:
+    GpuMat Dx_;
+    GpuMat Dy_;
+    GpuMat buf_;
+    GpuMat eig_;
+    GpuMat minMaxbuf_;
+    GpuMat tmpCorners_;
+};
+
+inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners_, double qualityLevel_, double minDistance_,
+        int blockSize_, bool useHarrisDetector_, double harrisK_)
+{
+    maxCorners = maxCorners_;
+    qualityLevel = qualityLevel_;
+    minDistance = minDistance_;
+    blockSize = blockSize_;
+    useHarrisDetector = useHarrisDetector_;
+    harrisK = harrisK_;
+}
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUIMGPROC_HPP__ */
diff --git a/modules/gpu/perf/perf_denoising.cpp b/modules/gpuimgproc/perf/perf_denoising.cpp
similarity index 100%
rename from modules/gpu/perf/perf_denoising.cpp
rename to modules/gpuimgproc/perf/perf_denoising.cpp
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpuimgproc/perf/perf_imgproc.cpp
similarity index 97%
rename from modules/gpu/perf/perf_imgproc.cpp
rename to modules/gpuimgproc/perf/perf_imgproc.cpp
index 5f8e9b297..349dcc825 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpuimgproc/perf/perf_imgproc.cpp
@@ -366,7 +366,7 @@ PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold,
 //////////////////////////////////////////////////////////////////////
 // HistEvenC1
 
-PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC1,
+PERF_TEST_P(Sz_Depth, HistEvenC1,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_16S)))
 {
@@ -405,7 +405,7 @@ PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC1,
 //////////////////////////////////////////////////////////////////////
 // HistEvenC4
 
-PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC4,
+PERF_TEST_P(Sz_Depth, HistEvenC4,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_16S)))
 {
@@ -446,7 +446,7 @@ PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC4,
 //////////////////////////////////////////////////////////////////////
 // CalcHist
 
-PERF_TEST_P(Sz, ImgProc_CalcHist,
+PERF_TEST_P(Sz, CalcHist,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -472,7 +472,7 @@ PERF_TEST_P(Sz, ImgProc_CalcHist,
 //////////////////////////////////////////////////////////////////////
 // EqualizeHist
 
-PERF_TEST_P(Sz, ImgProc_EqualizeHist,
+PERF_TEST_P(Sz, EqualizeHist,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -503,7 +503,7 @@ PERF_TEST_P(Sz, ImgProc_EqualizeHist,
 
 DEF_PARAM_TEST(Sz_ClipLimit, cv::Size, double);
 
-PERF_TEST_P(Sz_ClipLimit, ImgProc_CLAHE,
+PERF_TEST_P(Sz_ClipLimit, CLAHE,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(0.0, 40.0)))
 {
@@ -539,7 +539,7 @@ PERF_TEST_P(Sz_ClipLimit, ImgProc_CLAHE,
 
 DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool);
 
-PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny,
+PERF_TEST_P(Image_AppertureSz_L2gradient, Canny,
             Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"),
                     Values(3, 5),
                     Bool()))
@@ -579,7 +579,7 @@ PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny,
 
 DEF_PARAM_TEST_1(Image, string);
 
-PERF_TEST_P(Image, ImgProc_MeanShiftFiltering,
+PERF_TEST_P(Image, MeanShiftFiltering,
             Values<string>("gpu/meanshift/cones.png"))
 {
     declare.time(300.0);
@@ -615,7 +615,7 @@ PERF_TEST_P(Image, ImgProc_MeanShiftFiltering,
 //////////////////////////////////////////////////////////////////////
 // MeanShiftProc
 
-PERF_TEST_P(Image, ImgProc_MeanShiftProc,
+PERF_TEST_P(Image, MeanShiftProc,
             Values<string>("gpu/meanshift/cones.png"))
 {
     declare.time(300.0);
@@ -649,7 +649,7 @@ PERF_TEST_P(Image, ImgProc_MeanShiftProc,
 //////////////////////////////////////////////////////////////////////
 // MeanShiftSegmentation
 
-PERF_TEST_P(Image, ImgProc_MeanShiftSegmentation,
+PERF_TEST_P(Image, MeanShiftSegmentation,
             Values<string>("gpu/meanshift/cones.png"))
 {
     declare.time(300.0);
@@ -682,7 +682,7 @@ PERF_TEST_P(Image, ImgProc_MeanShiftSegmentation,
 //////////////////////////////////////////////////////////////////////
 // BlendLinear
 
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_BlendLinear,
+PERF_TEST_P(Sz_Depth_Cn, BlendLinear,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_32F),
                     GPU_CHANNELS_1_3_4))
@@ -725,7 +725,7 @@ CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED,
 
 DEF_PARAM_TEST(Sz_TemplateSz_Cn_Method, cv::Size, cv::Size, MatCn, TemplateMethod);
 
-PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate8U,
+PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate8U,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
                     GPU_CHANNELS_1_3_4,
@@ -765,7 +765,7 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate8U,
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate32F
 
-PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate32F,
+PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate32F,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
                     GPU_CHANNELS_1_3_4,
@@ -807,7 +807,7 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate32F,
 
 DEF_PARAM_TEST(Image_Type_Border_BlockSz_ApertureSz, string, MatType, BorderMode, int, int);
 
-PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerHarris,
+PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerHarris,
             Combine(Values<string>("gpu/stereobm/aloe-L.png"),
                     Values(CV_8UC1, CV_32FC1),
                     Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
@@ -852,7 +852,7 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerHarris,
 //////////////////////////////////////////////////////////////////////
 // CornerMinEigenVal
 
-PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerMinEigenVal,
+PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerMinEigenVal,
             Combine(Values<string>("gpu/stereobm/aloe-L.png"),
                     Values(CV_8UC1, CV_32FC1),
                     Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
@@ -1087,7 +1087,7 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp,
 
 DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CvtColorInfo);
 
-PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColor,
+PERF_TEST_P(Sz_Depth_Code, CvtColor,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_32F),
                     Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
@@ -1138,7 +1138,7 @@ PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColor,
     }
 }
 
-PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColorBayer,
+PERF_TEST_P(Sz_Depth_Code, CvtColorBayer,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U),
                     Values(CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
@@ -1185,7 +1185,7 @@ CV_ENUM(DemosaicingCode,
 
 DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode);
 
-PERF_TEST_P(Sz_Code, ImgProc_Demosaicing,
+PERF_TEST_P(Sz_Code, Demosaicing,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     DemosaicingCode::all()))
 {
@@ -1224,7 +1224,7 @@ PERF_TEST_P(Sz_Code, ImgProc_Demosaicing,
 //////////////////////////////////////////////////////////////////////
 // SwapChannels
 
-PERF_TEST_P(Sz, ImgProc_SwapChannels,
+PERF_TEST_P(Sz, SwapChannels,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -1255,7 +1255,7 @@ CV_ENUM(AlphaOp, ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_P
 
 DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, AlphaOp);
 
-PERF_TEST_P(Sz_Type_Op, ImgProc_AlphaComp,
+PERF_TEST_P(Sz_Type_Op, AlphaComp,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
                     AlphaOp::all()))
@@ -1394,7 +1394,7 @@ namespace
     };
 }
 
-PERF_TEST_P(Sz, ImgProc_HoughLines,
+PERF_TEST_P(Sz, HoughLines,
             GPU_TYPICAL_MAT_SIZES)
 {
     declare.time(30.0);
@@ -1442,7 +1442,7 @@ PERF_TEST_P(Sz, ImgProc_HoughLines,
 
 DEF_PARAM_TEST_1(Image, std::string);
 
-PERF_TEST_P(Image, ImgProc_HoughLinesP,
+PERF_TEST_P(Image, HoughLinesP,
             testing::Values("cv/shared/pic5.png", "stitching/a1.png"))
 {
     declare.time(30.0);
@@ -1490,7 +1490,7 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP,
 
 DEF_PARAM_TEST(Sz_Dp_MinDist, cv::Size, float, float);
 
-PERF_TEST_P(Sz_Dp_MinDist, ImgProc_HoughCircles,
+PERF_TEST_P(Sz_Dp_MinDist, HoughCircles,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(1.0f, 2.0f, 4.0f),
                     Values(1.0f)))
@@ -1547,7 +1547,7 @@ CV_FLAGS(GHMethod, GHT_POSITION, GHT_SCALE, GHT_ROTATION);
 
 DEF_PARAM_TEST(Method_Sz, GHMethod, cv::Size);
 
-PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough,
+PERF_TEST_P(Method_Sz, GeneralizedHough,
             Combine(Values(GHMethod(GHT_POSITION), GHMethod(GHT_POSITION | GHT_SCALE), GHMethod(GHT_POSITION | GHT_ROTATION), GHMethod(GHT_POSITION | GHT_SCALE | GHT_ROTATION)),
                     GPU_TYPICAL_MAT_SIZES))
 {
diff --git a/modules/gpu/perf/perf_labeling.cpp b/modules/gpuimgproc/perf/perf_labeling.cpp
similarity index 100%
rename from modules/gpu/perf/perf_labeling.cpp
rename to modules/gpuimgproc/perf/perf_labeling.cpp
diff --git a/modules/gpuimgproc/perf/perf_main.cpp b/modules/gpuimgproc/perf/perf_main.cpp
new file mode 100644
index 000000000..6b3bec5f8
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpuimgproc, printCudaInfo())
diff --git a/modules/gpuimgproc/perf/perf_precomp.cpp b/modules/gpuimgproc/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpuimgproc/perf/perf_precomp.hpp b/modules/gpuimgproc/perf/perf_precomp.hpp
new file mode 100644
index 000000000..6ecb958f4
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_precomp.hpp
@@ -0,0 +1,66 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpuimgproc.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include "opencv2/photo.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/src/blend.cpp b/modules/gpuimgproc/src/blend.cpp
similarity index 100%
rename from modules/gpu/src/blend.cpp
rename to modules/gpuimgproc/src/blend.cpp
diff --git a/modules/gpu/src/color.cpp b/modules/gpuimgproc/src/color.cpp
similarity index 100%
rename from modules/gpu/src/color.cpp
rename to modules/gpuimgproc/src/color.cpp
diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpuimgproc/src/cuda/bilateral_filter.cu
similarity index 100%
rename from modules/gpu/src/cuda/bilateral_filter.cu
rename to modules/gpuimgproc/src/cuda/bilateral_filter.cu
diff --git a/modules/gpu/src/cuda/blend.cu b/modules/gpuimgproc/src/cuda/blend.cu
similarity index 100%
rename from modules/gpu/src/cuda/blend.cu
rename to modules/gpuimgproc/src/cuda/blend.cu
diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpuimgproc/src/cuda/canny.cu
similarity index 100%
rename from modules/gpu/src/cuda/canny.cu
rename to modules/gpuimgproc/src/cuda/canny.cu
diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpuimgproc/src/cuda/ccomponetns.cu
similarity index 100%
rename from modules/gpu/src/cuda/ccomponetns.cu
rename to modules/gpuimgproc/src/cuda/ccomponetns.cu
diff --git a/modules/gpu/src/cuda/clahe.cu b/modules/gpuimgproc/src/cuda/clahe.cu
similarity index 100%
rename from modules/gpu/src/cuda/clahe.cu
rename to modules/gpuimgproc/src/cuda/clahe.cu
diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpuimgproc/src/cuda/color.cu
similarity index 100%
rename from modules/gpu/src/cuda/color.cu
rename to modules/gpuimgproc/src/cuda/color.cu
diff --git a/modules/gpu/src/cuda/debayer.cu b/modules/gpuimgproc/src/cuda/debayer.cu
similarity index 100%
rename from modules/gpu/src/cuda/debayer.cu
rename to modules/gpuimgproc/src/cuda/debayer.cu
diff --git a/modules/gpu/src/cuda/gftt.cu b/modules/gpuimgproc/src/cuda/gftt.cu
similarity index 100%
rename from modules/gpu/src/cuda/gftt.cu
rename to modules/gpuimgproc/src/cuda/gftt.cu
diff --git a/modules/gpu/src/cuda/hist.cu b/modules/gpuimgproc/src/cuda/hist.cu
similarity index 100%
rename from modules/gpu/src/cuda/hist.cu
rename to modules/gpuimgproc/src/cuda/hist.cu
diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpuimgproc/src/cuda/hough.cu
similarity index 100%
rename from modules/gpu/src/cuda/hough.cu
rename to modules/gpuimgproc/src/cuda/hough.cu
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpuimgproc/src/cuda/imgproc.cu
similarity index 99%
rename from modules/gpu/src/cuda/imgproc.cu
rename to modules/gpuimgproc/src/cuda/imgproc.cu
index 01cfae4cb..c6dfbb417 100644
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpuimgproc/src/cuda/imgproc.cu
@@ -47,7 +47,6 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
-#include "internal_shared.hpp"
 
 namespace cv { namespace gpu { namespace cudev
 {
diff --git a/modules/gpu/src/cuda/match_template.cu b/modules/gpuimgproc/src/cuda/match_template.cu
similarity index 100%
rename from modules/gpu/src/cuda/match_template.cu
rename to modules/gpuimgproc/src/cuda/match_template.cu
diff --git a/modules/gpu/src/cuda/nlm.cu b/modules/gpuimgproc/src/cuda/nlm.cu
similarity index 100%
rename from modules/gpu/src/cuda/nlm.cu
rename to modules/gpuimgproc/src/cuda/nlm.cu
diff --git a/modules/gpu/src/cuda/pyr_down.cu b/modules/gpuimgproc/src/cuda/pyr_down.cu
similarity index 100%
rename from modules/gpu/src/cuda/pyr_down.cu
rename to modules/gpuimgproc/src/cuda/pyr_down.cu
diff --git a/modules/gpu/src/cuda/pyr_up.cu b/modules/gpuimgproc/src/cuda/pyr_up.cu
similarity index 100%
rename from modules/gpu/src/cuda/pyr_up.cu
rename to modules/gpuimgproc/src/cuda/pyr_up.cu
diff --git a/modules/gpu/src/cuda/remap.cu b/modules/gpuimgproc/src/cuda/remap.cu
similarity index 100%
rename from modules/gpu/src/cuda/remap.cu
rename to modules/gpuimgproc/src/cuda/remap.cu
diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpuimgproc/src/cuda/resize.cu
similarity index 100%
rename from modules/gpu/src/cuda/resize.cu
rename to modules/gpuimgproc/src/cuda/resize.cu
diff --git a/modules/gpu/src/cuda/warp.cu b/modules/gpuimgproc/src/cuda/warp.cu
similarity index 100%
rename from modules/gpu/src/cuda/warp.cu
rename to modules/gpuimgproc/src/cuda/warp.cu
diff --git a/modules/gpu/src/cvt_color_internal.h b/modules/gpuimgproc/src/cvt_color_internal.h
similarity index 100%
rename from modules/gpu/src/cvt_color_internal.h
rename to modules/gpuimgproc/src/cvt_color_internal.h
diff --git a/modules/gpu/src/denoising.cpp b/modules/gpuimgproc/src/denoising.cpp
similarity index 100%
rename from modules/gpu/src/denoising.cpp
rename to modules/gpuimgproc/src/denoising.cpp
diff --git a/modules/gpu/src/gftt.cpp b/modules/gpuimgproc/src/gftt.cpp
similarity index 100%
rename from modules/gpu/src/gftt.cpp
rename to modules/gpuimgproc/src/gftt.cpp
diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpuimgproc/src/graphcuts.cpp
similarity index 100%
rename from modules/gpu/src/graphcuts.cpp
rename to modules/gpuimgproc/src/graphcuts.cpp
diff --git a/modules/gpu/src/hough.cpp b/modules/gpuimgproc/src/hough.cpp
similarity index 100%
rename from modules/gpu/src/hough.cpp
rename to modules/gpuimgproc/src/hough.cpp
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpuimgproc/src/imgproc.cpp
similarity index 100%
rename from modules/gpu/src/imgproc.cpp
rename to modules/gpuimgproc/src/imgproc.cpp
diff --git a/modules/gpu/src/match_template.cpp b/modules/gpuimgproc/src/match_template.cpp
similarity index 100%
rename from modules/gpu/src/match_template.cpp
rename to modules/gpuimgproc/src/match_template.cpp
diff --git a/modules/gpu/src/mssegmentation.cpp b/modules/gpuimgproc/src/mssegmentation.cpp
similarity index 100%
rename from modules/gpu/src/mssegmentation.cpp
rename to modules/gpuimgproc/src/mssegmentation.cpp
diff --git a/modules/gpuimgproc/src/precomp.cpp b/modules/gpuimgproc/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpuimgproc/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpuimgproc/src/precomp.hpp b/modules/gpuimgproc/src/precomp.hpp
new file mode 100644
index 000000000..7df02aadd
--- /dev/null
+++ b/modules/gpuimgproc/src/precomp.hpp
@@ -0,0 +1,53 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpuimgproc.hpp"
+
+#include "opencv2/core/private.hpp"
+#include "opencv2/core/gpu_private.hpp"
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/src/pyramids.cpp b/modules/gpuimgproc/src/pyramids.cpp
similarity index 100%
rename from modules/gpu/src/pyramids.cpp
rename to modules/gpuimgproc/src/pyramids.cpp
diff --git a/modules/gpu/src/remap.cpp b/modules/gpuimgproc/src/remap.cpp
similarity index 100%
rename from modules/gpu/src/remap.cpp
rename to modules/gpuimgproc/src/remap.cpp
diff --git a/modules/gpu/src/resize.cpp b/modules/gpuimgproc/src/resize.cpp
similarity index 100%
rename from modules/gpu/src/resize.cpp
rename to modules/gpuimgproc/src/resize.cpp
diff --git a/modules/gpu/src/warp.cpp b/modules/gpuimgproc/src/warp.cpp
similarity index 100%
rename from modules/gpu/src/warp.cpp
rename to modules/gpuimgproc/src/warp.cpp
diff --git a/modules/gpu/test/interpolation.hpp b/modules/gpuimgproc/test/interpolation.hpp
similarity index 100%
rename from modules/gpu/test/interpolation.hpp
rename to modules/gpuimgproc/test/interpolation.hpp
diff --git a/modules/gpu/test/test_color.cpp b/modules/gpuimgproc/test/test_color.cpp
similarity index 100%
rename from modules/gpu/test/test_color.cpp
rename to modules/gpuimgproc/test/test_color.cpp
diff --git a/modules/gpu/test/test_denoising.cpp b/modules/gpuimgproc/test/test_denoising.cpp
similarity index 100%
rename from modules/gpu/test/test_denoising.cpp
rename to modules/gpuimgproc/test/test_denoising.cpp
diff --git a/modules/gpu/test/test_hough.cpp b/modules/gpuimgproc/test/test_hough.cpp
similarity index 100%
rename from modules/gpu/test/test_hough.cpp
rename to modules/gpuimgproc/test/test_hough.cpp
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpuimgproc/test/test_imgproc.cpp
similarity index 100%
rename from modules/gpu/test/test_imgproc.cpp
rename to modules/gpuimgproc/test/test_imgproc.cpp
diff --git a/modules/gpu/test/test_labeling.cpp b/modules/gpuimgproc/test/test_labeling.cpp
similarity index 100%
rename from modules/gpu/test/test_labeling.cpp
rename to modules/gpuimgproc/test/test_labeling.cpp
diff --git a/modules/gpuimgproc/test/test_main.cpp b/modules/gpuimgproc/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpuimgproc/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpuimgproc/test/test_precomp.cpp b/modules/gpuimgproc/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpuimgproc/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpuimgproc/test/test_precomp.hpp b/modules/gpuimgproc/test/test_precomp.hpp
new file mode 100644
index 000000000..a80f5e5f4
--- /dev/null
+++ b/modules/gpuimgproc/test/test_precomp.hpp
@@ -0,0 +1,63 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpuimgproc.hpp"
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include "interpolation.hpp"
+
+#endif
diff --git a/modules/gpu/test/test_pyramids.cpp b/modules/gpuimgproc/test/test_pyramids.cpp
similarity index 100%
rename from modules/gpu/test/test_pyramids.cpp
rename to modules/gpuimgproc/test/test_pyramids.cpp
diff --git a/modules/gpu/test/test_remap.cpp b/modules/gpuimgproc/test/test_remap.cpp
similarity index 100%
rename from modules/gpu/test/test_remap.cpp
rename to modules/gpuimgproc/test/test_remap.cpp
diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpuimgproc/test/test_resize.cpp
similarity index 100%
rename from modules/gpu/test/test_resize.cpp
rename to modules/gpuimgproc/test/test_resize.cpp
diff --git a/modules/gpu/test/test_warp_affine.cpp b/modules/gpuimgproc/test/test_warp_affine.cpp
similarity index 100%
rename from modules/gpu/test/test_warp_affine.cpp
rename to modules/gpuimgproc/test/test_warp_affine.cpp
diff --git a/modules/gpu/test/test_warp_perspective.cpp b/modules/gpuimgproc/test/test_warp_perspective.cpp
similarity index 100%
rename from modules/gpu/test/test_warp_perspective.cpp
rename to modules/gpuimgproc/test/test_warp_perspective.cpp
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index 4678532af..bb444f070 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -19,6 +19,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
   if(HAVE_opencv_gpu)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 3b0555366..57fdeb093 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 

From 7544ddbfefe8ca2b8650ab1198d5da611482cd02 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:26:16 +0400
Subject: [PATCH 15/49] gpufeatures2d module 2d for feature detection and
 matching

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/doc/gpu.rst                       |   1 -
 modules/gpu/include/opencv2/gpu.hpp           | 299 +--------------
 modules/gpufeatures2d/CMakeLists.txt          |   9 +
 .../doc/feature_detection_and_description.rst |   0
 modules/gpufeatures2d/doc/gpufeatures2d.rst   |   8 +
 .../include/opencv2/gpufeatures2d.hpp         | 361 ++++++++++++++++++
 .../perf/perf_features2d.cpp                  |  10 +-
 modules/gpufeatures2d/perf/perf_main.cpp      |  47 +++
 modules/gpufeatures2d/perf/perf_precomp.cpp   |  43 +++
 modules/gpufeatures2d/perf/perf_precomp.hpp   |  64 ++++
 .../src/brute_force_matcher.cpp               |   0
 .../src/cuda/bf_knnmatch.cu                   |   0
 .../src/cuda/bf_match.cu                      |   0
 .../src/cuda/bf_radius_match.cu               |   0
 .../{gpu => gpufeatures2d}/src/cuda/fast.cu   |   0
 .../{gpu => gpufeatures2d}/src/cuda/orb.cu    |   0
 modules/{gpu => gpufeatures2d}/src/fast.cpp   |   0
 modules/{gpu => gpufeatures2d}/src/orb.cpp    |  12 +-
 modules/gpufeatures2d/src/precomp.cpp         |  43 +++
 modules/gpufeatures2d/src/precomp.hpp         |  58 +++
 .../test/test_features2d.cpp                  |   0
 modules/gpufeatures2d/test/test_main.cpp      |  45 +++
 modules/gpufeatures2d/test/test_precomp.cpp   |  43 +++
 modules/gpufeatures2d/test/test_precomp.hpp   |  60 +++
 samples/cpp/CMakeLists.txt                    |   1 +
 samples/gpu/CMakeLists.txt                    |   2 +-
 27 files changed, 796 insertions(+), 312 deletions(-)
 create mode 100644 modules/gpufeatures2d/CMakeLists.txt
 rename modules/{gpu => gpufeatures2d}/doc/feature_detection_and_description.rst (100%)
 create mode 100644 modules/gpufeatures2d/doc/gpufeatures2d.rst
 create mode 100644 modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
 rename modules/{gpu => gpufeatures2d}/perf/perf_features2d.cpp (97%)
 create mode 100644 modules/gpufeatures2d/perf/perf_main.cpp
 create mode 100644 modules/gpufeatures2d/perf/perf_precomp.cpp
 create mode 100644 modules/gpufeatures2d/perf/perf_precomp.hpp
 rename modules/{gpu => gpufeatures2d}/src/brute_force_matcher.cpp (100%)
 rename modules/{gpu => gpufeatures2d}/src/cuda/bf_knnmatch.cu (100%)
 rename modules/{gpu => gpufeatures2d}/src/cuda/bf_match.cu (100%)
 rename modules/{gpu => gpufeatures2d}/src/cuda/bf_radius_match.cu (100%)
 rename modules/{gpu => gpufeatures2d}/src/cuda/fast.cu (100%)
 rename modules/{gpu => gpufeatures2d}/src/cuda/orb.cu (100%)
 rename modules/{gpu => gpufeatures2d}/src/fast.cpp (100%)
 rename modules/{gpu => gpufeatures2d}/src/orb.cpp (98%)
 create mode 100644 modules/gpufeatures2d/src/precomp.cpp
 create mode 100644 modules/gpufeatures2d/src/precomp.hpp
 rename modules/{gpu => gpufeatures2d}/test/test_features2d.cpp (100%)
 create mode 100644 modules/gpufeatures2d/test/test_main.cpp
 create mode 100644 modules/gpufeatures2d/test/test_precomp.cpp
 create mode 100644 modules/gpufeatures2d/test/test_precomp.hpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index ee66608a2..fffc9bccc 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -4,7 +4,7 @@ endif()
 
 set(the_description "GPU-accelerated Computer Vision")
 
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc OPTIONAL opencv_gpunvidia)
+ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d OPTIONAL opencv_gpunvidia)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
 
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index 6c082ccd1..68d7fc21e 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -9,6 +9,5 @@ gpu. GPU-accelerated Computer Vision
     initalization_and_information
     data_structures
     object_detection
-    feature_detection_and_description
     camera_calibration_and_3d_reconstruction
     video
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 739732123..db8e224e9 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -53,6 +53,7 @@
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
 #include "opencv2/gpuimgproc.hpp"
+#include "opencv2/gpufeatures2d.hpp"
 
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
@@ -368,156 +369,7 @@ protected:
 
 ////////////////////////////////// BruteForceMatcher //////////////////////////////////
 
-class CV_EXPORTS BFMatcher_GPU
-{
-public:
-    explicit BFMatcher_GPU(int norm = cv::NORM_L2);
 
-    // Add descriptors to train descriptor collection
-    void add(const std::vector<GpuMat>& descCollection);
-
-    // Get train descriptors collection
-    const std::vector<GpuMat>& getTrainDescriptors() const;
-
-    // Clear train descriptors collection
-    void clear();
-
-    // Return true if there are not train descriptors in collection
-    bool empty() const;
-
-    // Return true if the matcher supports mask in match methods
-    bool isMaskSupported() const;
-
-    // Find one best match for each query descriptor
-    void matchSingle(const GpuMat& query, const GpuMat& train,
-        GpuMat& trainIdx, GpuMat& distance,
-        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
-
-    // Download trainIdx and distance and convert it to CPU vector with DMatch
-    static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
-    // Convert trainIdx and distance to vector with DMatch
-    static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
-
-    // Find one best match for each query descriptor
-    void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat());
-
-    // Make gpu collection of trains and masks in suitable format for matchCollection function
-    void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
-
-    // Find one best match from train collection for each query descriptor
-    void matchCollection(const GpuMat& query, const GpuMat& trainCollection,
-        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
-        const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());
-
-    // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
-    static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
-    // Convert trainIdx, imgIdx and distance to vector with DMatch
-    static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
-
-    // Find one best match from train collection for each query descriptor.
-    void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
-
-    // Find k best matches for each query descriptor (in increasing order of distances)
-    void knnMatchSingle(const GpuMat& query, const GpuMat& train,
-        GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
-        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
-
-    // Download trainIdx and distance and convert it to vector with DMatch
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
-    static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx and distance to vector with DMatch
-    static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-    // Find k best matches for each query descriptor (in increasing order of distances).
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
-    void knnMatch(const GpuMat& query, const GpuMat& train,
-        std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(),
-        bool compactResult = false);
-
-    // Find k best matches from train collection for each query descriptor (in increasing order of distances)
-    void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
-        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
-        const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null());
-
-    // Download trainIdx and distance and convert it to vector with DMatch
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
-    static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx and distance to vector with DMatch
-    static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-    // Find k best matches  for each query descriptor (in increasing order of distances).
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
-    void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
-        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
-
-    // Find best matches for each query descriptor which have distance less than maxDistance.
-    // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
-    // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
-    // because it didn't have enough memory.
-    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
-    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-    // Matches doesn't sorted.
-    void radiusMatchSingle(const GpuMat& query, const GpuMat& train,
-        GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
-        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
-
-    // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
-    // matches will be sorted in increasing order of distances.
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
-    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx, nMatches and distance to vector with DMatch.
-    static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-    // Find best matches for each query descriptor which have distance less than maxDistance
-    // in increasing order of distances).
-    void radiusMatch(const GpuMat& query, const GpuMat& train,
-        std::vector< std::vector<DMatch> >& matches, float maxDistance,
-        const GpuMat& mask = GpuMat(), bool compactResult = false);
-
-    // Find best matches for each query descriptor which have distance less than maxDistance.
-    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
-    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-    // Matches doesn't sorted.
-    void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
-        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null());
-
-    // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
-    // matches will be sorted in increasing order of distances.
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
-    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx, nMatches and distance to vector with DMatch.
-    static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-
-    // Find best matches from train collection for each query descriptor which have distance less than
-    // maxDistance (in increasing order of distances).
-    void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
-        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
-
-    int norm;
-
-private:
-    std::vector<GpuMat> trainDescCollection;
-};
 
 template <class Distance>
 class CV_EXPORTS BruteForceMatcher_GPU;
@@ -575,160 +427,11 @@ private:
 
 ////////////////////////////////// FAST //////////////////////////////////////////
 
-class CV_EXPORTS FAST_GPU
-{
-public:
-    enum
-    {
-        LOCATION_ROW = 0,
-        RESPONSE_ROW,
-        ROWS_COUNT
-    };
 
-    // all features have same size
-    static const int FEATURE_SIZE = 7;
-
-    explicit FAST_GPU(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
-
-    //! finds the keypoints using FAST detector
-    //! supports only CV_8UC1 images
-    void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
-    void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
-
-    //! download keypoints from device to host memory
-    static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-    //! convert keypoints to KeyPoint vector
-    static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
-
-    //! release temporary buffer's memory
-    void release();
-
-    bool nonmaxSupression;
-
-    int threshold;
-
-    //! max keypoints = keypointsRatio * img.size().area()
-    double keypointsRatio;
-
-    //! find keypoints and compute it's response if nonmaxSupression is true
-    //! return count of detected keypoints
-    int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);
-
-    //! get final array of keypoints
-    //! performs nonmax supression if needed
-    //! return final count of keypoints
-    int getKeyPoints(GpuMat& keypoints);
-
-private:
-    GpuMat kpLoc_;
-    int count_;
-
-    GpuMat score_;
-
-    GpuMat d_keypoints_;
-};
 
 ////////////////////////////////// ORB //////////////////////////////////////////
 
-class CV_EXPORTS ORB_GPU
-{
-public:
-    enum
-    {
-        X_ROW = 0,
-        Y_ROW,
-        RESPONSE_ROW,
-        ANGLE_ROW,
-        OCTAVE_ROW,
-        SIZE_ROW,
-        ROWS_COUNT
-    };
 
-    enum
-    {
-        DEFAULT_FAST_THRESHOLD = 20
-    };
-
-    //! Constructor
-    explicit ORB_GPU(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
-                     int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
-
-    //! Compute the ORB features on an image
-    //! image - the image to compute the features (supports only CV_8UC1 images)
-    //! mask - the mask to apply
-    //! keypoints - the resulting keypoints
-    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
-    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
-
-    //! Compute the ORB features and descriptors on an image
-    //! image - the image to compute the features (supports only CV_8UC1 images)
-    //! mask - the mask to apply
-    //! keypoints - the resulting keypoints
-    //! descriptors - descriptors array
-    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
-    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);
-
-    //! download keypoints from device to host memory
-    static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-    //! convert keypoints to KeyPoint vector
-    static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-    //! returns the descriptor size in bytes
-    inline int descriptorSize() const { return kBytes; }
-
-    inline void setFastParams(int threshold, bool nonmaxSupression = true)
-    {
-        fastDetector_.threshold = threshold;
-        fastDetector_.nonmaxSupression = nonmaxSupression;
-    }
-
-    //! release temporary buffer's memory
-    void release();
-
-    //! if true, image will be blurred before descriptors calculation
-    bool blurForDescriptor;
-
-private:
-    enum { kBytes = 32 };
-
-    void buildScalePyramids(const GpuMat& image, const GpuMat& mask);
-
-    void computeKeyPointsPyramid();
-
-    void computeDescriptors(GpuMat& descriptors);
-
-    void mergeKeyPoints(GpuMat& keypoints);
-
-    int nFeatures_;
-    float scaleFactor_;
-    int nLevels_;
-    int edgeThreshold_;
-    int firstLevel_;
-    int WTA_K_;
-    int scoreType_;
-    int patchSize_;
-
-    // The number of desired features per scale
-    std::vector<size_t> n_features_per_level_;
-
-    // Points to compute BRIEF descriptors from
-    GpuMat pattern_;
-
-    std::vector<GpuMat> imagePyr_;
-    std::vector<GpuMat> maskPyr_;
-
-    GpuMat buf_;
-
-    std::vector<GpuMat> keyPointsPyr_;
-    std::vector<int> keyPointsCount_;
-
-    FAST_GPU fastDetector_;
-
-    Ptr<FilterEngine_GPU> blurFilter;
-
-    GpuMat d_keypoints_;
-};
 
 ////////////////////////////////// Optical Flow //////////////////////////////////////////
 
diff --git a/modules/gpufeatures2d/CMakeLists.txt b/modules/gpufeatures2d/CMakeLists.txt
new file mode 100644
index 000000000..4a93be34a
--- /dev/null
+++ b/modules/gpufeatures2d/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpufeatures2d)
+endif()
+
+set(the_description "GPU-accelerated Feature Detection and Description")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
+
+ocv_define_module(gpufeatures2d opencv_features2d opencv_gpufilters opencv_gpuimgproc)
diff --git a/modules/gpu/doc/feature_detection_and_description.rst b/modules/gpufeatures2d/doc/feature_detection_and_description.rst
similarity index 100%
rename from modules/gpu/doc/feature_detection_and_description.rst
rename to modules/gpufeatures2d/doc/feature_detection_and_description.rst
diff --git a/modules/gpufeatures2d/doc/gpufeatures2d.rst b/modules/gpufeatures2d/doc/gpufeatures2d.rst
new file mode 100644
index 000000000..5679b1d8b
--- /dev/null
+++ b/modules/gpufeatures2d/doc/gpufeatures2d.rst
@@ -0,0 +1,8 @@
+****************************************************************
+gpufeatures2d. GPU-accelerated Feature Detection and Description
+****************************************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    feature_detection_and_description
diff --git a/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp b/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
new file mode 100644
index 000000000..08313b1fc
--- /dev/null
+++ b/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
@@ -0,0 +1,361 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUFEATURES2D_HPP__
+#define __OPENCV_GPUFEATURES2D_HPP__
+
+#ifndef __cplusplus
+#  error gpufeatures2d.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/gpumat.hpp"
+#include "opencv2/gpufilters.hpp"
+
+namespace cv { namespace gpu {
+
+class CV_EXPORTS BFMatcher_GPU
+{
+public:
+    explicit BFMatcher_GPU(int norm = cv::NORM_L2);
+
+    // Add descriptors to train descriptor collection
+    void add(const std::vector<GpuMat>& descCollection);
+
+    // Get train descriptors collection
+    const std::vector<GpuMat>& getTrainDescriptors() const;
+
+    // Clear train descriptors collection
+    void clear();
+
+    // Return true if there are not train descriptors in collection
+    bool empty() const;
+
+    // Return true if the matcher supports mask in match methods
+    bool isMaskSupported() const;
+
+    // Find one best match for each query descriptor
+    void matchSingle(const GpuMat& query, const GpuMat& train,
+        GpuMat& trainIdx, GpuMat& distance,
+        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+
+    // Download trainIdx and distance and convert it to CPU vector with DMatch
+    static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
+    // Convert trainIdx and distance to vector with DMatch
+    static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
+
+    // Find one best match for each query descriptor
+    void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat());
+
+    // Make gpu collection of trains and masks in suitable format for matchCollection function
+    void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
+
+    // Find one best match from train collection for each query descriptor
+    void matchCollection(const GpuMat& query, const GpuMat& trainCollection,
+        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
+        const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());
+
+    // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+    static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
+    // Convert trainIdx, imgIdx and distance to vector with DMatch
+    static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
+
+    // Find one best match from train collection for each query descriptor.
+    void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
+
+    // Find k best matches for each query descriptor (in increasing order of distances)
+    void knnMatchSingle(const GpuMat& query, const GpuMat& train,
+        GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
+        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+
+    // Download trainIdx and distance and convert it to vector with DMatch
+    // compactResult is used when mask is not empty. If compactResult is false matches
+    // vector will have the same size as queryDescriptors rows. If compactResult is true
+    // matches vector will not contain matches for fully masked out query descriptors.
+    static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    // Convert trainIdx and distance to vector with DMatch
+    static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+
+    // Find k best matches for each query descriptor (in increasing order of distances).
+    // compactResult is used when mask is not empty. If compactResult is false matches
+    // vector will have the same size as queryDescriptors rows. If compactResult is true
+    // matches vector will not contain matches for fully masked out query descriptors.
+    void knnMatch(const GpuMat& query, const GpuMat& train,
+        std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(),
+        bool compactResult = false);
+
+    // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+    void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
+        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
+        const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null());
+
+    // Download trainIdx and distance and convert it to vector with DMatch
+    // compactResult is used when mask is not empty. If compactResult is false matches
+    // vector will have the same size as queryDescriptors rows. If compactResult is true
+    // matches vector will not contain matches for fully masked out query descriptors.
+    static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    // Convert trainIdx and distance to vector with DMatch
+    static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+
+    // Find k best matches  for each query descriptor (in increasing order of distances).
+    // compactResult is used when mask is not empty. If compactResult is false matches
+    // vector will have the same size as queryDescriptors rows. If compactResult is true
+    // matches vector will not contain matches for fully masked out query descriptors.
+    void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
+        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
+
+    // Find best matches for each query descriptor which have distance less than maxDistance.
+    // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+    // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+    // because it didn't have enough memory.
+    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+    // Matches doesn't sorted.
+    void radiusMatchSingle(const GpuMat& query, const GpuMat& train,
+        GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
+        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+
+    // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+    // matches will be sorted in increasing order of distances.
+    // compactResult is used when mask is not empty. If compactResult is false matches
+    // vector will have the same size as queryDescriptors rows. If compactResult is true
+    // matches vector will not contain matches for fully masked out query descriptors.
+    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    // Convert trainIdx, nMatches and distance to vector with DMatch.
+    static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+
+    // Find best matches for each query descriptor which have distance less than maxDistance
+    // in increasing order of distances).
+    void radiusMatch(const GpuMat& query, const GpuMat& train,
+        std::vector< std::vector<DMatch> >& matches, float maxDistance,
+        const GpuMat& mask = GpuMat(), bool compactResult = false);
+
+    // Find best matches for each query descriptor which have distance less than maxDistance.
+    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+    // Matches doesn't sorted.
+    void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
+        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null());
+
+    // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+    // matches will be sorted in increasing order of distances.
+    // compactResult is used when mask is not empty. If compactResult is false matches
+    // vector will have the same size as queryDescriptors rows. If compactResult is true
+    // matches vector will not contain matches for fully masked out query descriptors.
+    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    // Convert trainIdx, nMatches and distance to vector with DMatch.
+    static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
+        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+
+    // Find best matches from train collection for each query descriptor which have distance less than
+    // maxDistance (in increasing order of distances).
+    void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
+        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
+
+    int norm;
+
+private:
+    std::vector<GpuMat> trainDescCollection;
+};
+
+class CV_EXPORTS FAST_GPU
+{
+public:
+    enum
+    {
+        LOCATION_ROW = 0,
+        RESPONSE_ROW,
+        ROWS_COUNT
+    };
+
+    // all features have same size
+    static const int FEATURE_SIZE = 7;
+
+    explicit FAST_GPU(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
+
+    //! finds the keypoints using FAST detector
+    //! supports only CV_8UC1 images
+    void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
+    void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
+
+    //! download keypoints from device to host memory
+    static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
+
+    //! convert keypoints to KeyPoint vector
+    static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
+
+    //! release temporary buffer's memory
+    void release();
+
+    bool nonmaxSupression;
+
+    int threshold;
+
+    //! max keypoints = keypointsRatio * img.size().area()
+    double keypointsRatio;
+
+    //! find keypoints and compute it's response if nonmaxSupression is true
+    //! return count of detected keypoints
+    int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);
+
+    //! get final array of keypoints
+    //! performs nonmax supression if needed
+    //! return final count of keypoints
+    int getKeyPoints(GpuMat& keypoints);
+
+private:
+    GpuMat kpLoc_;
+    int count_;
+
+    GpuMat score_;
+
+    GpuMat d_keypoints_;
+};
+
+class CV_EXPORTS ORB_GPU
+{
+public:
+    enum
+    {
+        X_ROW = 0,
+        Y_ROW,
+        RESPONSE_ROW,
+        ANGLE_ROW,
+        OCTAVE_ROW,
+        SIZE_ROW,
+        ROWS_COUNT
+    };
+
+    enum
+    {
+        DEFAULT_FAST_THRESHOLD = 20
+    };
+
+    //! Constructor
+    explicit ORB_GPU(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
+                     int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
+
+    //! Compute the ORB features on an image
+    //! image - the image to compute the features (supports only CV_8UC1 images)
+    //! mask - the mask to apply
+    //! keypoints - the resulting keypoints
+    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
+    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
+
+    //! Compute the ORB features and descriptors on an image
+    //! image - the image to compute the features (supports only CV_8UC1 images)
+    //! mask - the mask to apply
+    //! keypoints - the resulting keypoints
+    //! descriptors - descriptors array
+    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
+    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);
+
+    //! download keypoints from device to host memory
+    static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
+    //! convert keypoints to KeyPoint vector
+    static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
+
+    //! returns the descriptor size in bytes
+    inline int descriptorSize() const { return kBytes; }
+
+    inline void setFastParams(int threshold, bool nonmaxSupression = true)
+    {
+        fastDetector_.threshold = threshold;
+        fastDetector_.nonmaxSupression = nonmaxSupression;
+    }
+
+    //! release temporary buffer's memory
+    void release();
+
+    //! if true, image will be blurred before descriptors calculation
+    bool blurForDescriptor;
+
+private:
+    enum { kBytes = 32 };
+
+    void buildScalePyramids(const GpuMat& image, const GpuMat& mask);
+
+    void computeKeyPointsPyramid();
+
+    void computeDescriptors(GpuMat& descriptors);
+
+    void mergeKeyPoints(GpuMat& keypoints);
+
+    int nFeatures_;
+    float scaleFactor_;
+    int nLevels_;
+    int edgeThreshold_;
+    int firstLevel_;
+    int WTA_K_;
+    int scoreType_;
+    int patchSize_;
+
+    // The number of desired features per scale
+    std::vector<size_t> n_features_per_level_;
+
+    // Points to compute BRIEF descriptors from
+    GpuMat pattern_;
+
+    std::vector<GpuMat> imagePyr_;
+    std::vector<GpuMat> maskPyr_;
+
+    GpuMat buf_;
+
+    std::vector<GpuMat> keyPointsPyr_;
+    std::vector<int> keyPointsCount_;
+
+    FAST_GPU fastDetector_;
+
+    Ptr<FilterEngine_GPU> blurFilter;
+
+    GpuMat d_keypoints_;
+};
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUFEATURES2D_HPP__ */
diff --git a/modules/gpu/perf/perf_features2d.cpp b/modules/gpufeatures2d/perf/perf_features2d.cpp
similarity index 97%
rename from modules/gpu/perf/perf_features2d.cpp
rename to modules/gpufeatures2d/perf/perf_features2d.cpp
index feee3a939..9396ba290 100644
--- a/modules/gpu/perf/perf_features2d.cpp
+++ b/modules/gpufeatures2d/perf/perf_features2d.cpp
@@ -51,7 +51,7 @@ using namespace perf;
 
 DEF_PARAM_TEST(Image_Threshold_NonMaxSupression, string, int, bool);
 
-PERF_TEST_P(Image_Threshold_NonMaxSupression, Features2D_FAST,
+PERF_TEST_P(Image_Threshold_NonMaxSupression, FAST,
             Combine(Values<string>("gpu/perf/aloe.png"),
                     Values(20),
                     Bool()))
@@ -93,7 +93,7 @@ PERF_TEST_P(Image_Threshold_NonMaxSupression, Features2D_FAST,
 
 DEF_PARAM_TEST(Image_NFeatures, string, int);
 
-PERF_TEST_P(Image_NFeatures, Features2D_ORB,
+PERF_TEST_P(Image_NFeatures, ORB,
             Combine(Values<string>("gpu/perf/aloe.png"),
                     Values(4000)))
 {
@@ -145,7 +145,7 @@ PERF_TEST_P(Image_NFeatures, Features2D_ORB,
 
 DEF_PARAM_TEST(DescSize_Norm, int, NormType);
 
-PERF_TEST_P(DescSize_Norm, Features2D_BFMatch,
+PERF_TEST_P(DescSize_Norm, BFMatch,
             Combine(Values(64, 128, 256),
                     Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
 {
@@ -202,7 +202,7 @@ static void toOneRowMatches(const std::vector< std::vector<cv::DMatch> >& src, s
 
 DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
 
-PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch,
+PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
             Combine(Values(64, 128, 256),
                     Values(2, 3),
                     Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
@@ -257,7 +257,7 @@ PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch,
 //////////////////////////////////////////////////////////////////////
 // BFRadiusMatch
 
-PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch,
+PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
             Combine(Values(64, 128, 256),
                     Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
 {
diff --git a/modules/gpufeatures2d/perf/perf_main.cpp b/modules/gpufeatures2d/perf/perf_main.cpp
new file mode 100644
index 000000000..0fd79fde3
--- /dev/null
+++ b/modules/gpufeatures2d/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpufeatures2d, printCudaInfo())
diff --git a/modules/gpufeatures2d/perf/perf_precomp.cpp b/modules/gpufeatures2d/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpufeatures2d/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpufeatures2d/perf/perf_precomp.hpp b/modules/gpufeatures2d/perf/perf_precomp.hpp
new file mode 100644
index 000000000..4f767c4fb
--- /dev/null
+++ b/modules/gpufeatures2d/perf/perf_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpufeatures2d.hpp"
+#include "opencv2/features2d.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpufeatures2d/src/brute_force_matcher.cpp
similarity index 100%
rename from modules/gpu/src/brute_force_matcher.cpp
rename to modules/gpufeatures2d/src/brute_force_matcher.cpp
diff --git a/modules/gpu/src/cuda/bf_knnmatch.cu b/modules/gpufeatures2d/src/cuda/bf_knnmatch.cu
similarity index 100%
rename from modules/gpu/src/cuda/bf_knnmatch.cu
rename to modules/gpufeatures2d/src/cuda/bf_knnmatch.cu
diff --git a/modules/gpu/src/cuda/bf_match.cu b/modules/gpufeatures2d/src/cuda/bf_match.cu
similarity index 100%
rename from modules/gpu/src/cuda/bf_match.cu
rename to modules/gpufeatures2d/src/cuda/bf_match.cu
diff --git a/modules/gpu/src/cuda/bf_radius_match.cu b/modules/gpufeatures2d/src/cuda/bf_radius_match.cu
similarity index 100%
rename from modules/gpu/src/cuda/bf_radius_match.cu
rename to modules/gpufeatures2d/src/cuda/bf_radius_match.cu
diff --git a/modules/gpu/src/cuda/fast.cu b/modules/gpufeatures2d/src/cuda/fast.cu
similarity index 100%
rename from modules/gpu/src/cuda/fast.cu
rename to modules/gpufeatures2d/src/cuda/fast.cu
diff --git a/modules/gpu/src/cuda/orb.cu b/modules/gpufeatures2d/src/cuda/orb.cu
similarity index 100%
rename from modules/gpu/src/cuda/orb.cu
rename to modules/gpufeatures2d/src/cuda/orb.cu
diff --git a/modules/gpu/src/fast.cpp b/modules/gpufeatures2d/src/fast.cpp
similarity index 100%
rename from modules/gpu/src/fast.cpp
rename to modules/gpufeatures2d/src/fast.cpp
diff --git a/modules/gpu/src/orb.cpp b/modules/gpufeatures2d/src/orb.cpp
similarity index 98%
rename from modules/gpu/src/orb.cpp
rename to modules/gpufeatures2d/src/orb.cpp
index 2d40416f6..495ca3f6e 100644
--- a/modules/gpu/src/orb.cpp
+++ b/modules/gpufeatures2d/src/orb.cpp
@@ -504,19 +504,19 @@ void cv::gpu::ORB_GPU::buildScalePyramids(const GpuMat& image, const GpuMat& mas
         {
             if (level < firstLevel_)
             {
-                resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+                gpu::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
 
                 if (!mask.empty())
-                    resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
+                    gpu::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
             }
             else
             {
-                resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+                gpu::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
 
                 if (!mask.empty())
                 {
-                    resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
-                    threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
+                    gpu::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
+                    gpu::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
                 }
             }
         }
@@ -534,7 +534,7 @@ void cv::gpu::ORB_GPU::buildScalePyramids(const GpuMat& image, const GpuMat& mas
         Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
         buf_(inner).setTo(Scalar::all(255));
 
-        bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
+        gpu::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
     }
 }
 
diff --git a/modules/gpufeatures2d/src/precomp.cpp b/modules/gpufeatures2d/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpufeatures2d/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpufeatures2d/src/precomp.hpp b/modules/gpufeatures2d/src/precomp.hpp
new file mode 100644
index 000000000..d3936264b
--- /dev/null
+++ b/modules/gpufeatures2d/src/precomp.hpp
@@ -0,0 +1,58 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+
+#include "opencv2/gpufeatures2d.hpp"
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpuimgproc.hpp"
+
+#include "opencv2/features2d.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpufeatures2d/test/test_features2d.cpp
similarity index 100%
rename from modules/gpu/test/test_features2d.cpp
rename to modules/gpufeatures2d/test/test_features2d.cpp
diff --git a/modules/gpufeatures2d/test/test_main.cpp b/modules/gpufeatures2d/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpufeatures2d/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpufeatures2d/test/test_precomp.cpp b/modules/gpufeatures2d/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpufeatures2d/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpufeatures2d/test/test_precomp.hpp b/modules/gpufeatures2d/test/test_precomp.hpp
new file mode 100644
index 000000000..7725d3f3d
--- /dev/null
+++ b/modules/gpufeatures2d/test/test_precomp.hpp
@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpufeatures2d.hpp"
+#include "opencv2/features2d.hpp"
+
+#endif
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index bb444f070..06ccb4da0 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -20,6 +20,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 57fdeb093..f1382269a 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 

From fc1fa285565dc393c867096a32d8296ea3d80f3c Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 10 Apr 2013 10:59:25 +0400
Subject: [PATCH 16/49] gpuvideo module for video processing

---
 modules/gpu/CMakeLists.txt                    |   4 +-
 modules/gpu/doc/gpu.rst                       |   1 -
 modules/gpu/include/opencv2/gpu.hpp           | 521 +---------------
 .../gpunvidia/include/opencv2/gpunvidia.hpp   |   1 -
 modules/gpuvideo/CMakeLists.txt               |   9 +
 modules/gpuvideo/doc/gpuvideo.rst             |   8 +
 modules/{gpu => gpuvideo}/doc/video.rst       |   0
 modules/gpuvideo/include/opencv2/gpuvideo.hpp | 586 ++++++++++++++++++
 .../opencv2/gpuvideo}/NCVBroxOpticalFlow.hpp  |   0
 modules/gpuvideo/perf/perf_main.cpp           |  47 ++
 modules/gpuvideo/perf/perf_precomp.cpp        |  43 ++
 modules/gpuvideo/perf/perf_precomp.hpp        |  67 ++
 modules/{gpu => gpuvideo}/perf/perf_video.cpp |   0
 modules/{gpu => gpuvideo}/src/bgfg_gmg.cpp    |   0
 modules/{gpu => gpuvideo}/src/bgfg_mog.cpp    |   0
 .../src/cuda/NCVBroxOpticalFlow.cu            |   2 +-
 .../{gpu => gpuvideo}/src/cuda/bgfg_gmg.cu    |   0
 .../{gpu => gpuvideo}/src/cuda/bgfg_mog.cu    |   0
 .../{gpu => gpuvideo}/src/cuda/fgd_bgfg.cu    |   0
 .../src/cuda/fgd_bgfg_common.hpp              |   0
 .../{gpu => gpuvideo}/src/cuda/optflowbm.cu   |   0
 .../src/cuda/optical_flow.cu                  |   0
 .../src/cuda/optical_flow_farneback.cu        |   0
 modules/{gpu => gpuvideo}/src/cuda/pyrlk.cu   |   0
 .../{gpu => gpuvideo}/src/cuda/tvl1flow.cu    |   0
 modules/{gpu => gpuvideo}/src/fgd_bgfg.cpp    |   0
 modules/{gpu => gpuvideo}/src/optflowbm.cpp   |   0
 .../{gpu => gpuvideo}/src/optical_flow.cpp    |   0
 .../src/optical_flow_farneback.cpp            |   0
 modules/gpuvideo/src/precomp.cpp              |  43 ++
 modules/gpuvideo/src/precomp.hpp              |  69 +++
 modules/{gpu => gpuvideo}/src/pyrlk.cpp       |   0
 modules/{gpu => gpuvideo}/src/tvl1flow.cpp    |   0
 modules/{gpu => gpuvideo}/test/test_bgfg.cpp  |   0
 modules/gpuvideo/test/test_main.cpp           |  45 ++
 .../{gpu => gpuvideo}/test/test_optflow.cpp   |   0
 modules/gpuvideo/test/test_precomp.cpp        |  43 ++
 modules/gpuvideo/test/test_precomp.hpp        |  65 ++
 samples/cpp/CMakeLists.txt                    |   1 +
 samples/gpu/CMakeLists.txt                    |   2 +-
 samples/gpu/opticalflow_nvidia_api.cpp        |   1 +
 41 files changed, 1033 insertions(+), 525 deletions(-)
 create mode 100644 modules/gpuvideo/CMakeLists.txt
 create mode 100644 modules/gpuvideo/doc/gpuvideo.rst
 rename modules/{gpu => gpuvideo}/doc/video.rst (100%)
 create mode 100644 modules/gpuvideo/include/opencv2/gpuvideo.hpp
 rename modules/{gpunvidia/include/opencv2/gpunvidia => gpuvideo/include/opencv2/gpuvideo}/NCVBroxOpticalFlow.hpp (100%)
 create mode 100644 modules/gpuvideo/perf/perf_main.cpp
 create mode 100644 modules/gpuvideo/perf/perf_precomp.cpp
 create mode 100644 modules/gpuvideo/perf/perf_precomp.hpp
 rename modules/{gpu => gpuvideo}/perf/perf_video.cpp (100%)
 rename modules/{gpu => gpuvideo}/src/bgfg_gmg.cpp (100%)
 rename modules/{gpu => gpuvideo}/src/bgfg_mog.cpp (100%)
 rename modules/{gpunvidia => gpuvideo}/src/cuda/NCVBroxOpticalFlow.cu (99%)
 rename modules/{gpu => gpuvideo}/src/cuda/bgfg_gmg.cu (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/bgfg_mog.cu (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/fgd_bgfg.cu (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/fgd_bgfg_common.hpp (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/optflowbm.cu (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/optical_flow.cu (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/optical_flow_farneback.cu (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/pyrlk.cu (100%)
 rename modules/{gpu => gpuvideo}/src/cuda/tvl1flow.cu (100%)
 rename modules/{gpu => gpuvideo}/src/fgd_bgfg.cpp (100%)
 rename modules/{gpu => gpuvideo}/src/optflowbm.cpp (100%)
 rename modules/{gpu => gpuvideo}/src/optical_flow.cpp (100%)
 rename modules/{gpu => gpuvideo}/src/optical_flow_farneback.cpp (100%)
 create mode 100644 modules/gpuvideo/src/precomp.cpp
 create mode 100644 modules/gpuvideo/src/precomp.hpp
 rename modules/{gpu => gpuvideo}/src/pyrlk.cpp (100%)
 rename modules/{gpu => gpuvideo}/src/tvl1flow.cpp (100%)
 rename modules/{gpu => gpuvideo}/test/test_bgfg.cpp (100%)
 create mode 100644 modules/gpuvideo/test/test_main.cpp
 rename modules/{gpu => gpuvideo}/test/test_optflow.cpp (100%)
 create mode 100644 modules/gpuvideo/test/test_precomp.cpp
 create mode 100644 modules/gpuvideo/test/test_precomp.hpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index fffc9bccc..296545add 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -4,7 +4,9 @@ endif()
 
 set(the_description "GPU-accelerated Computer Vision")
 
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d OPTIONAL opencv_gpunvidia)
+ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy
+                   opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo
+                   OPTIONAL opencv_gpunvidia)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
 
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index 68d7fc21e..3803efd19 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -10,4 +10,3 @@ gpu. GPU-accelerated Computer Vision
     data_structures
     object_detection
     camera_calibration_and_3d_reconstruction
-    video
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index db8e224e9..524a32fc9 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -54,6 +54,7 @@
 #include "opencv2/gpufilters.hpp"
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpufeatures2d.hpp"
+#include "opencv2/gpuvideo.hpp"
 
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
@@ -433,543 +434,23 @@ private:
 
 
 
-////////////////////////////////// Optical Flow //////////////////////////////////////////
 
-class CV_EXPORTS BroxOpticalFlow
-{
-public:
-    BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
-        alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
-        inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
-    {
-    }
 
-    //! Compute optical flow
-    //! frame0 - source frame (supports only CV_32FC1 type)
-    //! frame1 - frame to track (with the same size and type as frame0)
-    //! u      - flow horizontal component (along x axis)
-    //! v      - flow vertical component (along y axis)
-    void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
 
-    //! flow smoothness
-    float alpha;
 
-    //! gradient constancy importance
-    float gamma;
 
-    //! pyramid scale factor
-    float scale_factor;
 
-    //! number of lagged non-linearity iterations (inner loop)
-    int inner_iterations;
 
-    //! number of warping iterations (number of pyramid levels)
-    int outer_iterations;
 
-    //! number of linear system solver iterations
-    int solver_iterations;
 
-    GpuMat buf;
-};
 
 
 
 
-class CV_EXPORTS PyrLKOpticalFlow
-{
-public:
-    PyrLKOpticalFlow();
 
-    void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
-        GpuMat& status, GpuMat* err = 0);
 
-    void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
 
-    void releaseMemory();
 
-    Size winSize;
-    int maxLevel;
-    int iters;
-    bool useInitialFlow;
-
-private:
-    std::vector<GpuMat> prevPyr_;
-    std::vector<GpuMat> nextPyr_;
-
-    GpuMat buf_;
-
-    GpuMat uPyr_[2];
-    GpuMat vPyr_[2];
-};
-
-
-class CV_EXPORTS FarnebackOpticalFlow
-{
-public:
-    FarnebackOpticalFlow()
-    {
-        numLevels = 5;
-        pyrScale = 0.5;
-        fastPyramids = false;
-        winSize = 13;
-        numIters = 10;
-        polyN = 5;
-        polySigma = 1.1;
-        flags = 0;
-    }
-
-    int numLevels;
-    double pyrScale;
-    bool fastPyramids;
-    int winSize;
-    int numIters;
-    int polyN;
-    double polySigma;
-    int flags;
-
-    void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
-
-    void releaseMemory()
-    {
-        frames_[0].release();
-        frames_[1].release();
-        pyrLevel_[0].release();
-        pyrLevel_[1].release();
-        M_.release();
-        bufM_.release();
-        R_[0].release();
-        R_[1].release();
-        blurredFrame_[0].release();
-        blurredFrame_[1].release();
-        pyramid0_.clear();
-        pyramid1_.clear();
-    }
-
-private:
-    void prepareGaussian(
-            int n, double sigma, float *g, float *xg, float *xxg,
-            double &ig11, double &ig03, double &ig33, double &ig55);
-
-    void setPolynomialExpansionConsts(int n, double sigma);
-
-    void updateFlow_boxFilter(
-            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
-            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
-
-    void updateFlow_gaussianBlur(
-            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
-            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
-
-    GpuMat frames_[2];
-    GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
-    std::vector<GpuMat> pyramid0_, pyramid1_;
-};
-
-
-// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
-//
-// see reference:
-//   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
-//   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
-class CV_EXPORTS OpticalFlowDual_TVL1_GPU
-{
-public:
-    OpticalFlowDual_TVL1_GPU();
-
-    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
-
-    void collectGarbage();
-
-    /**
-     * Time step of the numerical scheme.
-     */
-    double tau;
-
-    /**
-     * Weight parameter for the data term, attachment parameter.
-     * This is the most relevant parameter, which determines the smoothness of the output.
-     * The smaller this parameter is, the smoother the solutions we obtain.
-     * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
-     */
-    double lambda;
-
-    /**
-     * Weight parameter for (u - v)^2, tightness parameter.
-     * It serves as a link between the attachment and the regularization terms.
-     * In theory, it should have a small value in order to maintain both parts in correspondence.
-     * The method is stable for a large range of values of this parameter.
-     */
-    double theta;
-
-    /**
-     * Number of scales used to create the pyramid of images.
-     */
-    int nscales;
-
-    /**
-     * Number of warpings per scale.
-     * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
-     * This is a parameter that assures the stability of the method.
-     * It also affects the running time, so it is a compromise between speed and accuracy.
-     */
-    int warps;
-
-    /**
-     * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
-     * A small value will yield more accurate solutions at the expense of a slower convergence.
-     */
-    double epsilon;
-
-    /**
-     * Stopping criterion iterations number used in the numerical scheme.
-     */
-    int iterations;
-
-    double scaleStep;
-
-    bool useInitialFlow;
-
-private:
-    void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2);
-
-    std::vector<GpuMat> I0s;
-    std::vector<GpuMat> I1s;
-    std::vector<GpuMat> u1s;
-    std::vector<GpuMat> u2s;
-
-    GpuMat I1x_buf;
-    GpuMat I1y_buf;
-
-    GpuMat I1w_buf;
-    GpuMat I1wx_buf;
-    GpuMat I1wy_buf;
-
-    GpuMat grad_buf;
-    GpuMat rho_c_buf;
-
-    GpuMat p11_buf;
-    GpuMat p12_buf;
-    GpuMat p21_buf;
-    GpuMat p22_buf;
-
-    GpuMat diff_buf;
-    GpuMat norm_buf;
-};
-
-
-//! Calculates optical flow for 2 images using block matching algorithm */
-CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
-                                  Size block_size, Size shift_size, Size max_range, bool use_previous,
-                                  GpuMat& velx, GpuMat& vely, GpuMat& buf,
-                                  Stream& stream = Stream::Null());
-
-class CV_EXPORTS FastOpticalFlowBM
-{
-public:
-    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
-
-private:
-    GpuMat buffer;
-    GpuMat extended_I0;
-    GpuMat extended_I1;
-};
-
-
-//! Interpolate frames (images) using provided optical flow (displacement field).
-//! frame0   - frame 0 (32-bit floating point images, single channel)
-//! frame1   - frame 1 (the same type and size)
-//! fu       - forward horizontal displacement
-//! fv       - forward vertical displacement
-//! bu       - backward horizontal displacement
-//! bv       - backward vertical displacement
-//! pos      - new frame position
-//! newFrame - new frame
-//! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat;
-//!            occlusion masks            0, occlusion masks            1,
-//!            interpolated forward flow  0, interpolated forward flow  1,
-//!            interpolated backward flow 0, interpolated backward flow 1
-//!
-CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
-                                  const GpuMat& fu, const GpuMat& fv,
-                                  const GpuMat& bu, const GpuMat& bv,
-                                  float pos, GpuMat& newFrame, GpuMat& buf,
-                                  Stream& stream = Stream::Null());
-
-CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
-
-
-//////////////////////// Background/foreground segmentation ////////////////////////
-
-// Foreground Object Detection from Videos Containing Complex Background.
-// Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian.
-// ACM MM2003 9p
-class CV_EXPORTS FGDStatModel
-{
-public:
-    struct CV_EXPORTS Params
-    {
-        int Lc;  // Quantized levels per 'color' component. Power of two, typically 32, 64 or 128.
-        int N1c; // Number of color vectors used to model normal background color variation at a given pixel.
-        int N2c; // Number of color vectors retained at given pixel.  Must be > N1c, typically ~ 5/3 of N1c.
-        // Used to allow the first N1c vectors to adapt over time to changing background.
-
-        int Lcc;  // Quantized levels per 'color co-occurrence' component.  Power of two, typically 16, 32 or 64.
-        int N1cc; // Number of color co-occurrence vectors used to model normal background color variation at a given pixel.
-        int N2cc; // Number of color co-occurrence vectors retained at given pixel.  Must be > N1cc, typically ~ 5/3 of N1cc.
-        // Used to allow the first N1cc vectors to adapt over time to changing background.
-
-        bool is_obj_without_holes; // If TRUE we ignore holes within foreground blobs. Defaults to TRUE.
-        int perform_morphing;     // Number of erode-dilate-erode foreground-blob cleanup iterations.
-        // These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1.
-
-        float alpha1; // How quickly we forget old background pixel values seen. Typically set to 0.1.
-        float alpha2; // "Controls speed of feature learning". Depends on T. Typical value circa 0.005.
-        float alpha3; // Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1.
-
-        float delta;   // Affects color and color co-occurrence quantization, typically set to 2.
-        float T;       // A percentage value which determines when new features can be recognized as new background. (Typically 0.9).
-        float minArea; // Discard foreground blobs whose bounding box is smaller than this threshold.
-
-        // default Params
-        Params();
-    };
-
-    // out_cn - channels count in output result (can be 3 or 4)
-    // 4-channels require more memory, but a bit faster
-    explicit FGDStatModel(int out_cn = 3);
-    explicit FGDStatModel(const cv::gpu::GpuMat& firstFrame, const Params& params = Params(), int out_cn = 3);
-
-    ~FGDStatModel();
-
-    void create(const cv::gpu::GpuMat& firstFrame, const Params& params = Params());
-    void release();
-
-    int update(const cv::gpu::GpuMat& curFrame);
-
-    //8UC3 or 8UC4 reference background image
-    cv::gpu::GpuMat background;
-
-    //8UC1 foreground image
-    cv::gpu::GpuMat foreground;
-
-    std::vector< std::vector<cv::Point> > foreground_regions;
-
-private:
-    FGDStatModel(const FGDStatModel&);
-    FGDStatModel& operator=(const FGDStatModel&);
-
-    class Impl;
-    std::auto_ptr<Impl> impl_;
-};
-
-/*!
- Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
-
- The class implements the following algorithm:
- "An improved adaptive background mixture model for real-time tracking with shadow detection"
- P. KadewTraKuPong and R. Bowden,
- Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
- http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
-*/
-class CV_EXPORTS MOG_GPU
-{
-public:
-    //! the default constructor
-    MOG_GPU(int nmixtures = -1);
-
-    //! re-initiaization method
-    void initialize(Size frameSize, int frameType);
-
-    //! the update operator
-    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null());
-
-    //! computes a background image which are the mean of all background gaussians
-    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
-
-    //! releases all inner buffers
-    void release();
-
-    int history;
-    float varThreshold;
-    float backgroundRatio;
-    float noiseSigma;
-
-private:
-    int nmixtures_;
-
-    Size frameSize_;
-    int frameType_;
-    int nframes_;
-
-    GpuMat weight_;
-    GpuMat sortKey_;
-    GpuMat mean_;
-    GpuMat var_;
-};
-
-/*!
- The class implements the following algorithm:
- "Improved adaptive Gausian mixture model for background subtraction"
- Z.Zivkovic
- International Conference Pattern Recognition, UK, August, 2004.
- http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
-*/
-class CV_EXPORTS MOG2_GPU
-{
-public:
-    //! the default constructor
-    MOG2_GPU(int nmixtures = -1);
-
-    //! re-initiaization method
-    void initialize(Size frameSize, int frameType);
-
-    //! the update operator
-    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
-
-    //! computes a background image which are the mean of all background gaussians
-    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
-
-    //! releases all inner buffers
-    void release();
-
-    // parameters
-    // you should call initialize after parameters changes
-
-    int history;
-
-    //! here it is the maximum allowed number of mixture components.
-    //! Actual number is determined dynamically per pixel
-    float varThreshold;
-    // threshold on the squared Mahalanobis distance to decide if it is well described
-    // by the background model or not. Related to Cthr from the paper.
-    // This does not influence the update of the background. A typical value could be 4 sigma
-    // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
-
-    /////////////////////////
-    // less important parameters - things you might change but be carefull
-    ////////////////////////
-
-    float backgroundRatio;
-    // corresponds to fTB=1-cf from the paper
-    // TB - threshold when the component becomes significant enough to be included into
-    // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
-    // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
-    // it is considered foreground
-    // float noiseSigma;
-    float varThresholdGen;
-
-    //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
-    //when a sample is close to the existing components. If it is not close
-    //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
-    //Smaller Tg leads to more generated components and higher Tg might make
-    //lead to small number of components but they can grow too large
-    float fVarInit;
-    float fVarMin;
-    float fVarMax;
-
-    //initial variance  for the newly generated components.
-    //It will will influence the speed of adaptation. A good guess should be made.
-    //A simple way is to estimate the typical standard deviation from the images.
-    //I used here 10 as a reasonable value
-    // min and max can be used to further control the variance
-    float fCT; //CT - complexity reduction prior
-    //this is related to the number of samples needed to accept that a component
-    //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
-    //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
-
-    //shadow detection parameters
-    bool bShadowDetection; //default 1 - do shadow detection
-    unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
-    float fTau;
-    // Tau - shadow threshold. The shadow is detected if the pixel is darker
-    //version of the background. Tau is a threshold on how much darker the shadow can be.
-    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
-    //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
-
-private:
-    int nmixtures_;
-
-    Size frameSize_;
-    int frameType_;
-    int nframes_;
-
-    GpuMat weight_;
-    GpuMat variance_;
-    GpuMat mean_;
-
-    GpuMat bgmodelUsedModes_; //keep track of number of modes per pixel
-};
-
-/**
- * Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1)
- * images of the same size, where 255 indicates Foreground and 0 represents Background.
- * This class implements an algorithm described in "Visual Tracking of Human Visitors under
- * Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere,
- * A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012.
- */
-class CV_EXPORTS GMG_GPU
-{
-public:
-    GMG_GPU();
-
-    /**
-     * Validate parameters and set up data structures for appropriate frame size.
-     * @param frameSize Input frame size
-     * @param min       Minimum value taken on by pixels in image sequence. Usually 0
-     * @param max       Maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
-     */
-    void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
-
-    /**
-     * Performs single-frame background subtraction and builds up a statistical background image
-     * model.
-     * @param frame        Input frame
-     * @param fgmask       Output mask image representing foreground and background pixels
-     * @param stream       Stream for the asynchronous version
-     */
-    void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
-
-    //! Releases all inner buffers
-    void release();
-
-    //! Total number of distinct colors to maintain in histogram.
-    int maxFeatures;
-
-    //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
-    float learningRate;
-
-    //! Number of frames of video to use to initialize histograms.
-    int numInitializationFrames;
-
-    //! Number of discrete levels in each channel to be used in histograms.
-    int quantizationLevels;
-
-    //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
-    float backgroundPrior;
-
-    //! Value above which pixel is determined to be FG.
-    float decisionThreshold;
-
-    //! Smoothing radius, in pixels, for cleaning up FG image.
-    int smoothingRadius;
-
-    //! Perform background model update.
-    bool updateBackgroundModel;
-
-private:
-    float maxVal_, minVal_;
-
-    Size frameSize_;
-
-    int frameNum_;
-
-    GpuMat nfeatures_;
-    GpuMat colors_;
-    GpuMat weights_;
-
-    Ptr<FilterEngine_GPU> boxFilter_;
-    GpuMat buf_;
-};
 
 //! removes points (CV_32FC2, single row matrix) with zero mask value
 CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia.hpp b/modules/gpunvidia/include/opencv2/gpunvidia.hpp
index c59dc6402..4c07417a8 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia.hpp
@@ -46,7 +46,6 @@
 #include "opencv2/gpunvidia/NCV.hpp"
 #include "opencv2/gpunvidia/NPP_staging.hpp"
 #include "opencv2/gpunvidia/NCVPyramid.hpp"
-#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
 #include "opencv2/gpunvidia/NCVHaarObjectDetection.hpp"
 
 #endif /* __OPENCV_GPUNVIDIA_HPP__ */
diff --git a/modules/gpuvideo/CMakeLists.txt b/modules/gpuvideo/CMakeLists.txt
new file mode 100644
index 000000000..3e4e4baef
--- /dev/null
+++ b/modules/gpuvideo/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpuvideo)
+endif()
+
+set(the_description "GPU-accelerated Video Analysis")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpuvideo opencv_video opencv_legacy opencv_gpufilters opencv_gpuimgproc OPTIONAL opencv_gpunvidia)
diff --git a/modules/gpuvideo/doc/gpuvideo.rst b/modules/gpuvideo/doc/gpuvideo.rst
new file mode 100644
index 000000000..e16d7c427
--- /dev/null
+++ b/modules/gpuvideo/doc/gpuvideo.rst
@@ -0,0 +1,8 @@
+***********************************
+gpu. GPU-accelerated Video Analysis
+***********************************
+
+.. toctree::
+    :maxdepth: 1
+
+    video
diff --git a/modules/gpu/doc/video.rst b/modules/gpuvideo/doc/video.rst
similarity index 100%
rename from modules/gpu/doc/video.rst
rename to modules/gpuvideo/doc/video.rst
diff --git a/modules/gpuvideo/include/opencv2/gpuvideo.hpp b/modules/gpuvideo/include/opencv2/gpuvideo.hpp
new file mode 100644
index 000000000..de3cf7fa7
--- /dev/null
+++ b/modules/gpuvideo/include/opencv2/gpuvideo.hpp
@@ -0,0 +1,586 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUVIDEO_HPP__
+#define __OPENCV_GPUVIDEO_HPP__
+
+#include <memory>
+
+#include "opencv2/core/gpumat.hpp"
+#include "opencv2/gpufilters.hpp"
+
+namespace cv { namespace gpu {
+
+////////////////////////////////// Optical Flow //////////////////////////////////////////
+
+class CV_EXPORTS BroxOpticalFlow
+{
+public:
+    BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
+        alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
+        inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
+    {
+    }
+
+    //! Compute optical flow
+    //! frame0 - source frame (supports only CV_32FC1 type)
+    //! frame1 - frame to track (with the same size and type as frame0)
+    //! u      - flow horizontal component (along x axis)
+    //! v      - flow vertical component (along y axis)
+    void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
+
+    //! flow smoothness
+    float alpha;
+
+    //! gradient constancy importance
+    float gamma;
+
+    //! pyramid scale factor
+    float scale_factor;
+
+    //! number of lagged non-linearity iterations (inner loop)
+    int inner_iterations;
+
+    //! number of warping iterations (number of pyramid levels)
+    int outer_iterations;
+
+    //! number of linear system solver iterations
+    int solver_iterations;
+
+    GpuMat buf;
+};
+
+class CV_EXPORTS PyrLKOpticalFlow
+{
+public:
+    PyrLKOpticalFlow();
+
+    void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
+        GpuMat& status, GpuMat* err = 0);
+
+    void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
+
+    void releaseMemory();
+
+    Size winSize;
+    int maxLevel;
+    int iters;
+    bool useInitialFlow;
+
+private:
+    std::vector<GpuMat> prevPyr_;
+    std::vector<GpuMat> nextPyr_;
+
+    GpuMat buf_;
+
+    GpuMat uPyr_[2];
+    GpuMat vPyr_[2];
+};
+
+class CV_EXPORTS FarnebackOpticalFlow
+{
+public:
+    FarnebackOpticalFlow()
+    {
+        numLevels = 5;
+        pyrScale = 0.5;
+        fastPyramids = false;
+        winSize = 13;
+        numIters = 10;
+        polyN = 5;
+        polySigma = 1.1;
+        flags = 0;
+    }
+
+    int numLevels;
+    double pyrScale;
+    bool fastPyramids;
+    int winSize;
+    int numIters;
+    int polyN;
+    double polySigma;
+    int flags;
+
+    void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
+
+    void releaseMemory()
+    {
+        frames_[0].release();
+        frames_[1].release();
+        pyrLevel_[0].release();
+        pyrLevel_[1].release();
+        M_.release();
+        bufM_.release();
+        R_[0].release();
+        R_[1].release();
+        blurredFrame_[0].release();
+        blurredFrame_[1].release();
+        pyramid0_.clear();
+        pyramid1_.clear();
+    }
+
+private:
+    void prepareGaussian(
+            int n, double sigma, float *g, float *xg, float *xxg,
+            double &ig11, double &ig03, double &ig33, double &ig55);
+
+    void setPolynomialExpansionConsts(int n, double sigma);
+
+    void updateFlow_boxFilter(
+            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
+            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
+
+    void updateFlow_gaussianBlur(
+            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
+            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
+
+    GpuMat frames_[2];
+    GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
+    std::vector<GpuMat> pyramid0_, pyramid1_;
+};
+
+// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+//
+// see reference:
+//   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+//   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+class CV_EXPORTS OpticalFlowDual_TVL1_GPU
+{
+public:
+    OpticalFlowDual_TVL1_GPU();
+
+    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
+
+    void collectGarbage();
+
+    /**
+     * Time step of the numerical scheme.
+     */
+    double tau;
+
+    /**
+     * Weight parameter for the data term, attachment parameter.
+     * This is the most relevant parameter, which determines the smoothness of the output.
+     * The smaller this parameter is, the smoother the solutions we obtain.
+     * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+     */
+    double lambda;
+
+    /**
+     * Weight parameter for (u - v)^2, tightness parameter.
+     * It serves as a link between the attachment and the regularization terms.
+     * In theory, it should have a small value in order to maintain both parts in correspondence.
+     * The method is stable for a large range of values of this parameter.
+     */
+    double theta;
+
+    /**
+     * Number of scales used to create the pyramid of images.
+     */
+    int nscales;
+
+    /**
+     * Number of warpings per scale.
+     * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+     * This is a parameter that assures the stability of the method.
+     * It also affects the running time, so it is a compromise between speed and accuracy.
+     */
+    int warps;
+
+    /**
+     * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+     * A small value will yield more accurate solutions at the expense of a slower convergence.
+     */
+    double epsilon;
+
+    /**
+     * Stopping criterion iterations number used in the numerical scheme.
+     */
+    int iterations;
+
+    double scaleStep;
+
+    bool useInitialFlow;
+
+private:
+    void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2);
+
+    std::vector<GpuMat> I0s;
+    std::vector<GpuMat> I1s;
+    std::vector<GpuMat> u1s;
+    std::vector<GpuMat> u2s;
+
+    GpuMat I1x_buf;
+    GpuMat I1y_buf;
+
+    GpuMat I1w_buf;
+    GpuMat I1wx_buf;
+    GpuMat I1wy_buf;
+
+    GpuMat grad_buf;
+    GpuMat rho_c_buf;
+
+    GpuMat p11_buf;
+    GpuMat p12_buf;
+    GpuMat p21_buf;
+    GpuMat p22_buf;
+
+    GpuMat diff_buf;
+    GpuMat norm_buf;
+};
+
+//! Calculates optical flow for 2 images using block matching algorithm */
+CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
+                                  Size block_size, Size shift_size, Size max_range, bool use_previous,
+                                  GpuMat& velx, GpuMat& vely, GpuMat& buf,
+                                  Stream& stream = Stream::Null());
+
+class CV_EXPORTS FastOpticalFlowBM
+{
+public:
+    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
+
+private:
+    GpuMat buffer;
+    GpuMat extended_I0;
+    GpuMat extended_I1;
+};
+
+
+//! Interpolate frames (images) using provided optical flow (displacement field).
+//! frame0   - frame 0 (32-bit floating point images, single channel)
+//! frame1   - frame 1 (the same type and size)
+//! fu       - forward horizontal displacement
+//! fv       - forward vertical displacement
+//! bu       - backward horizontal displacement
+//! bv       - backward vertical displacement
+//! pos      - new frame position
+//! newFrame - new frame
+//! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat;
+//!            occlusion masks            0, occlusion masks            1,
+//!            interpolated forward flow  0, interpolated forward flow  1,
+//!            interpolated backward flow 0, interpolated backward flow 1
+//!
+CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
+                                  const GpuMat& fu, const GpuMat& fv,
+                                  const GpuMat& bu, const GpuMat& bv,
+                                  float pos, GpuMat& newFrame, GpuMat& buf,
+                                  Stream& stream = Stream::Null());
+
+CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
+
+//////////////////////// Background/foreground segmentation ////////////////////////
+
+// Foreground Object Detection from Videos Containing Complex Background.
+// Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian.
+// ACM MM2003 9p
+class CV_EXPORTS FGDStatModel
+{
+public:
+    struct CV_EXPORTS Params
+    {
+        int Lc;  // Quantized levels per 'color' component. Power of two, typically 32, 64 or 128.
+        int N1c; // Number of color vectors used to model normal background color variation at a given pixel.
+        int N2c; // Number of color vectors retained at given pixel.  Must be > N1c, typically ~ 5/3 of N1c.
+        // Used to allow the first N1c vectors to adapt over time to changing background.
+
+        int Lcc;  // Quantized levels per 'color co-occurrence' component.  Power of two, typically 16, 32 or 64.
+        int N1cc; // Number of color co-occurrence vectors used to model normal background color variation at a given pixel.
+        int N2cc; // Number of color co-occurrence vectors retained at given pixel.  Must be > N1cc, typically ~ 5/3 of N1cc.
+        // Used to allow the first N1cc vectors to adapt over time to changing background.
+
+        bool is_obj_without_holes; // If TRUE we ignore holes within foreground blobs. Defaults to TRUE.
+        int perform_morphing;     // Number of erode-dilate-erode foreground-blob cleanup iterations.
+        // These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1.
+
+        float alpha1; // How quickly we forget old background pixel values seen. Typically set to 0.1.
+        float alpha2; // "Controls speed of feature learning". Depends on T. Typical value circa 0.005.
+        float alpha3; // Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1.
+
+        float delta;   // Affects color and color co-occurrence quantization, typically set to 2.
+        float T;       // A percentage value which determines when new features can be recognized as new background. (Typically 0.9).
+        float minArea; // Discard foreground blobs whose bounding box is smaller than this threshold.
+
+        // default Params
+        Params();
+    };
+
+    // out_cn - channels count in output result (can be 3 or 4)
+    // 4-channels require more memory, but a bit faster
+    explicit FGDStatModel(int out_cn = 3);
+    explicit FGDStatModel(const cv::gpu::GpuMat& firstFrame, const Params& params = Params(), int out_cn = 3);
+
+    ~FGDStatModel();
+
+    void create(const cv::gpu::GpuMat& firstFrame, const Params& params = Params());
+    void release();
+
+    int update(const cv::gpu::GpuMat& curFrame);
+
+    //8UC3 or 8UC4 reference background image
+    cv::gpu::GpuMat background;
+
+    //8UC1 foreground image
+    cv::gpu::GpuMat foreground;
+
+    std::vector< std::vector<cv::Point> > foreground_regions;
+
+private:
+    FGDStatModel(const FGDStatModel&);
+    FGDStatModel& operator=(const FGDStatModel&);
+
+    class Impl;
+    std::auto_ptr<Impl> impl_;
+};
+
+/*!
+ Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+
+ The class implements the following algorithm:
+ "An improved adaptive background mixture model for real-time tracking with shadow detection"
+ P. KadewTraKuPong and R. Bowden,
+ Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+ http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+*/
+class CV_EXPORTS MOG_GPU
+{
+public:
+    //! the default constructor
+    MOG_GPU(int nmixtures = -1);
+
+    //! re-initiaization method
+    void initialize(Size frameSize, int frameType);
+
+    //! the update operator
+    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null());
+
+    //! computes a background image which are the mean of all background gaussians
+    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
+
+    //! releases all inner buffers
+    void release();
+
+    int history;
+    float varThreshold;
+    float backgroundRatio;
+    float noiseSigma;
+
+private:
+    int nmixtures_;
+
+    Size frameSize_;
+    int frameType_;
+    int nframes_;
+
+    GpuMat weight_;
+    GpuMat sortKey_;
+    GpuMat mean_;
+    GpuMat var_;
+};
+
+/*!
+ The class implements the following algorithm:
+ "Improved adaptive Gausian mixture model for background subtraction"
+ Z.Zivkovic
+ International Conference Pattern Recognition, UK, August, 2004.
+ http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+*/
+class CV_EXPORTS MOG2_GPU
+{
+public:
+    //! the default constructor
+    MOG2_GPU(int nmixtures = -1);
+
+    //! re-initiaization method
+    void initialize(Size frameSize, int frameType);
+
+    //! the update operator
+    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+
+    //! computes a background image which are the mean of all background gaussians
+    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
+
+    //! releases all inner buffers
+    void release();
+
+    // parameters
+    // you should call initialize after parameters changes
+
+    int history;
+
+    //! here it is the maximum allowed number of mixture components.
+    //! Actual number is determined dynamically per pixel
+    float varThreshold;
+    // threshold on the squared Mahalanobis distance to decide if it is well described
+    // by the background model or not. Related to Cthr from the paper.
+    // This does not influence the update of the background. A typical value could be 4 sigma
+    // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+    /////////////////////////
+    // less important parameters - things you might change but be carefull
+    ////////////////////////
+
+    float backgroundRatio;
+    // corresponds to fTB=1-cf from the paper
+    // TB - threshold when the component becomes significant enough to be included into
+    // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+    // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+    // it is considered foreground
+    // float noiseSigma;
+    float varThresholdGen;
+
+    //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+    //when a sample is close to the existing components. If it is not close
+    //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+    //Smaller Tg leads to more generated components and higher Tg might make
+    //lead to small number of components but they can grow too large
+    float fVarInit;
+    float fVarMin;
+    float fVarMax;
+
+    //initial variance  for the newly generated components.
+    //It will will influence the speed of adaptation. A good guess should be made.
+    //A simple way is to estimate the typical standard deviation from the images.
+    //I used here 10 as a reasonable value
+    // min and max can be used to further control the variance
+    float fCT; //CT - complexity reduction prior
+    //this is related to the number of samples needed to accept that a component
+    //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+    //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+    //shadow detection parameters
+    bool bShadowDetection; //default 1 - do shadow detection
+    unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+    float fTau;
+    // Tau - shadow threshold. The shadow is detected if the pixel is darker
+    //version of the background. Tau is a threshold on how much darker the shadow can be.
+    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+    //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+private:
+    int nmixtures_;
+
+    Size frameSize_;
+    int frameType_;
+    int nframes_;
+
+    GpuMat weight_;
+    GpuMat variance_;
+    GpuMat mean_;
+
+    GpuMat bgmodelUsedModes_; //keep track of number of modes per pixel
+};
+
+/**
+ * Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1)
+ * images of the same size, where 255 indicates Foreground and 0 represents Background.
+ * This class implements an algorithm described in "Visual Tracking of Human Visitors under
+ * Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere,
+ * A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012.
+ */
+class CV_EXPORTS GMG_GPU
+{
+public:
+    GMG_GPU();
+
+    /**
+     * Validate parameters and set up data structures for appropriate frame size.
+     * @param frameSize Input frame size
+     * @param min       Minimum value taken on by pixels in image sequence. Usually 0
+     * @param max       Maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
+     */
+    void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
+
+    /**
+     * Performs single-frame background subtraction and builds up a statistical background image
+     * model.
+     * @param frame        Input frame
+     * @param fgmask       Output mask image representing foreground and background pixels
+     * @param stream       Stream for the asynchronous version
+     */
+    void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+
+    //! Releases all inner buffers
+    void release();
+
+    //! Total number of distinct colors to maintain in histogram.
+    int maxFeatures;
+
+    //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
+    float learningRate;
+
+    //! Number of frames of video to use to initialize histograms.
+    int numInitializationFrames;
+
+    //! Number of discrete levels in each channel to be used in histograms.
+    int quantizationLevels;
+
+    //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
+    float backgroundPrior;
+
+    //! Value above which pixel is determined to be FG.
+    float decisionThreshold;
+
+    //! Smoothing radius, in pixels, for cleaning up FG image.
+    int smoothingRadius;
+
+    //! Perform background model update.
+    bool updateBackgroundModel;
+
+private:
+    float maxVal_, minVal_;
+
+    Size frameSize_;
+
+    int frameNum_;
+
+    GpuMat nfeatures_;
+    GpuMat colors_;
+    GpuMat weights_;
+
+    Ptr<FilterEngine_GPU> boxFilter_;
+    GpuMat buf_;
+};
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUVIDEO_HPP__ */
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp b/modules/gpuvideo/include/opencv2/gpuvideo/NCVBroxOpticalFlow.hpp
similarity index 100%
rename from modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
rename to modules/gpuvideo/include/opencv2/gpuvideo/NCVBroxOpticalFlow.hpp
diff --git a/modules/gpuvideo/perf/perf_main.cpp b/modules/gpuvideo/perf/perf_main.cpp
new file mode 100644
index 000000000..b35791cda
--- /dev/null
+++ b/modules/gpuvideo/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
diff --git a/modules/gpuvideo/perf/perf_precomp.cpp b/modules/gpuvideo/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpuvideo/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpuvideo/perf/perf_precomp.hpp b/modules/gpuvideo/perf/perf_precomp.hpp
new file mode 100644
index 000000000..ecb314900
--- /dev/null
+++ b/modules/gpuvideo/perf/perf_precomp.hpp
@@ -0,0 +1,67 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpuvideo.hpp"
+#include "opencv2/gpuimgproc.hpp"
+
+#include "opencv2/video.hpp"
+#include "opencv2/legacy.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpuvideo/perf/perf_video.cpp
similarity index 100%
rename from modules/gpu/perf/perf_video.cpp
rename to modules/gpuvideo/perf/perf_video.cpp
diff --git a/modules/gpu/src/bgfg_gmg.cpp b/modules/gpuvideo/src/bgfg_gmg.cpp
similarity index 100%
rename from modules/gpu/src/bgfg_gmg.cpp
rename to modules/gpuvideo/src/bgfg_gmg.cpp
diff --git a/modules/gpu/src/bgfg_mog.cpp b/modules/gpuvideo/src/bgfg_mog.cpp
similarity index 100%
rename from modules/gpu/src/bgfg_mog.cpp
rename to modules/gpuvideo/src/bgfg_mog.cpp
diff --git a/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu b/modules/gpuvideo/src/cuda/NCVBroxOpticalFlow.cu
similarity index 99%
rename from modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
rename to modules/gpuvideo/src/cuda/NCVBroxOpticalFlow.cu
index 4faba6331..427d4fa20 100644
--- a/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
+++ b/modules/gpuvideo/src/cuda/NCVBroxOpticalFlow.cu
@@ -64,7 +64,7 @@
 #include "opencv2/core/cuda/utility.hpp"
 
 #include "opencv2/gpunvidia/NPP_staging.hpp"
-#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
+#include "opencv2/gpuvideo/NCVBroxOpticalFlow.hpp"
 
 
 typedef NCVVectorAlloc<Ncv32f> FloatVector;
diff --git a/modules/gpu/src/cuda/bgfg_gmg.cu b/modules/gpuvideo/src/cuda/bgfg_gmg.cu
similarity index 100%
rename from modules/gpu/src/cuda/bgfg_gmg.cu
rename to modules/gpuvideo/src/cuda/bgfg_gmg.cu
diff --git a/modules/gpu/src/cuda/bgfg_mog.cu b/modules/gpuvideo/src/cuda/bgfg_mog.cu
similarity index 100%
rename from modules/gpu/src/cuda/bgfg_mog.cu
rename to modules/gpuvideo/src/cuda/bgfg_mog.cu
diff --git a/modules/gpu/src/cuda/fgd_bgfg.cu b/modules/gpuvideo/src/cuda/fgd_bgfg.cu
similarity index 100%
rename from modules/gpu/src/cuda/fgd_bgfg.cu
rename to modules/gpuvideo/src/cuda/fgd_bgfg.cu
diff --git a/modules/gpu/src/cuda/fgd_bgfg_common.hpp b/modules/gpuvideo/src/cuda/fgd_bgfg_common.hpp
similarity index 100%
rename from modules/gpu/src/cuda/fgd_bgfg_common.hpp
rename to modules/gpuvideo/src/cuda/fgd_bgfg_common.hpp
diff --git a/modules/gpu/src/cuda/optflowbm.cu b/modules/gpuvideo/src/cuda/optflowbm.cu
similarity index 100%
rename from modules/gpu/src/cuda/optflowbm.cu
rename to modules/gpuvideo/src/cuda/optflowbm.cu
diff --git a/modules/gpu/src/cuda/optical_flow.cu b/modules/gpuvideo/src/cuda/optical_flow.cu
similarity index 100%
rename from modules/gpu/src/cuda/optical_flow.cu
rename to modules/gpuvideo/src/cuda/optical_flow.cu
diff --git a/modules/gpu/src/cuda/optical_flow_farneback.cu b/modules/gpuvideo/src/cuda/optical_flow_farneback.cu
similarity index 100%
rename from modules/gpu/src/cuda/optical_flow_farneback.cu
rename to modules/gpuvideo/src/cuda/optical_flow_farneback.cu
diff --git a/modules/gpu/src/cuda/pyrlk.cu b/modules/gpuvideo/src/cuda/pyrlk.cu
similarity index 100%
rename from modules/gpu/src/cuda/pyrlk.cu
rename to modules/gpuvideo/src/cuda/pyrlk.cu
diff --git a/modules/gpu/src/cuda/tvl1flow.cu b/modules/gpuvideo/src/cuda/tvl1flow.cu
similarity index 100%
rename from modules/gpu/src/cuda/tvl1flow.cu
rename to modules/gpuvideo/src/cuda/tvl1flow.cu
diff --git a/modules/gpu/src/fgd_bgfg.cpp b/modules/gpuvideo/src/fgd_bgfg.cpp
similarity index 100%
rename from modules/gpu/src/fgd_bgfg.cpp
rename to modules/gpuvideo/src/fgd_bgfg.cpp
diff --git a/modules/gpu/src/optflowbm.cpp b/modules/gpuvideo/src/optflowbm.cpp
similarity index 100%
rename from modules/gpu/src/optflowbm.cpp
rename to modules/gpuvideo/src/optflowbm.cpp
diff --git a/modules/gpu/src/optical_flow.cpp b/modules/gpuvideo/src/optical_flow.cpp
similarity index 100%
rename from modules/gpu/src/optical_flow.cpp
rename to modules/gpuvideo/src/optical_flow.cpp
diff --git a/modules/gpu/src/optical_flow_farneback.cpp b/modules/gpuvideo/src/optical_flow_farneback.cpp
similarity index 100%
rename from modules/gpu/src/optical_flow_farneback.cpp
rename to modules/gpuvideo/src/optical_flow_farneback.cpp
diff --git a/modules/gpuvideo/src/precomp.cpp b/modules/gpuvideo/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpuvideo/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpuvideo/src/precomp.hpp b/modules/gpuvideo/src/precomp.hpp
new file mode 100644
index 000000000..e105817a1
--- /dev/null
+++ b/modules/gpuvideo/src/precomp.hpp
@@ -0,0 +1,69 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+
+#include "opencv2/gpuvideo.hpp"
+
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/gpuimgproc.hpp"
+
+#include "opencv2/video.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUNVIDIA
+#  include "opencv2/gpunvidia/private.hpp"
+#  include "opencv2/gpuvideo/NCVBroxOpticalFlow.hpp"
+#endif
+
+#ifdef HAVE_CUDA
+#  include "cuda/fgd_bgfg_common.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/src/pyrlk.cpp b/modules/gpuvideo/src/pyrlk.cpp
similarity index 100%
rename from modules/gpu/src/pyrlk.cpp
rename to modules/gpuvideo/src/pyrlk.cpp
diff --git a/modules/gpu/src/tvl1flow.cpp b/modules/gpuvideo/src/tvl1flow.cpp
similarity index 100%
rename from modules/gpu/src/tvl1flow.cpp
rename to modules/gpuvideo/src/tvl1flow.cpp
diff --git a/modules/gpu/test/test_bgfg.cpp b/modules/gpuvideo/test/test_bgfg.cpp
similarity index 100%
rename from modules/gpu/test/test_bgfg.cpp
rename to modules/gpuvideo/test/test_bgfg.cpp
diff --git a/modules/gpuvideo/test/test_main.cpp b/modules/gpuvideo/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpuvideo/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpu/test/test_optflow.cpp b/modules/gpuvideo/test/test_optflow.cpp
similarity index 100%
rename from modules/gpu/test/test_optflow.cpp
rename to modules/gpuvideo/test/test_optflow.cpp
diff --git a/modules/gpuvideo/test/test_precomp.cpp b/modules/gpuvideo/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpuvideo/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpuvideo/test/test_precomp.hpp b/modules/gpuvideo/test/test_precomp.hpp
new file mode 100644
index 000000000..b2b141aa4
--- /dev/null
+++ b/modules/gpuvideo/test/test_precomp.hpp
@@ -0,0 +1,65 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include <fstream>
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpuvideo.hpp"
+#include "opencv2/gpuimgproc.hpp"
+
+#include "opencv2/video.hpp"
+#include "opencv2/legacy.hpp"
+
+#endif
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index 06ccb4da0..3cf2391e5 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -21,6 +21,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuvideo/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index f1382269a..9ee7fe8f2 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 
diff --git a/samples/gpu/opticalflow_nvidia_api.cpp b/samples/gpu/opticalflow_nvidia_api.cpp
index e4fc93cd5..3c11029d1 100644
--- a/samples/gpu/opticalflow_nvidia_api.cpp
+++ b/samples/gpu/opticalflow_nvidia_api.cpp
@@ -17,6 +17,7 @@
 
 #ifdef HAVE_CUDA
 #include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpuvideo/NCVBroxOpticalFlow.hpp"
 #endif
 
 #if !defined(HAVE_CUDA)

From b08b9ab83b4b05e937a5e464b01f6ace057de8dc Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 17:51:19 +0400
Subject: [PATCH 17/49] gpucalib3d module for camera calibration and stereo
 correspondence

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/doc/gpu.rst                       |   1 -
 modules/gpu/include/opencv2/gpu.hpp           | 186 +------------
 modules/gpucalib3d/CMakeLists.txt             |   9 +
 ...mera_calibration_and_3d_reconstruction.rst |   0
 modules/gpucalib3d/doc/gpucalib3d.rst         |   8 +
 .../gpucalib3d/include/opencv2/gpucalib3d.hpp | 255 ++++++++++++++++++
 .../{gpu => gpucalib3d}/perf/perf_calib3d.cpp |   0
 modules/gpucalib3d/perf/perf_main.cpp         |  47 ++++
 modules/gpucalib3d/perf/perf_precomp.cpp      |  43 +++
 modules/gpucalib3d/perf/perf_precomp.hpp      |  65 +++++
 modules/{gpu => gpucalib3d}/src/calib3d.cpp   |  78 +++++-
 .../{gpu => gpucalib3d}/src/cuda/calib3d.cu   | 183 +++++++++++++
 .../src/cuda/disp_bilateral_filter.cu         |   0
 .../{gpu => gpucalib3d}/src/cuda/stereobm.cu  |   0
 .../{gpu => gpucalib3d}/src/cuda/stereobp.cu  |   0
 .../src/cuda/stereocsbp.cu                    |   0
 .../src/disparity_bilateral_filter.cpp        |   0
 modules/gpucalib3d/src/precomp.cpp            |  43 +++
 modules/gpucalib3d/src/precomp.hpp            |  56 ++++
 modules/{gpu => gpucalib3d}/src/stereobm.cpp  |   0
 modules/{gpu => gpucalib3d}/src/stereobp.cpp  |   0
 .../{gpu => gpucalib3d}/src/stereocsbp.cpp    |   0
 .../{gpu => gpucalib3d}/test/test_calib3d.cpp |   0
 modules/gpucalib3d/test/test_main.cpp         |  45 ++++
 modules/gpucalib3d/test/test_precomp.cpp      |  43 +++
 modules/gpucalib3d/test/test_precomp.hpp      |  61 +++++
 .../gpuimgproc/include/opencv2/gpuimgproc.hpp |  12 -
 modules/gpuimgproc/src/cuda/imgproc.cu        | 181 -------------
 modules/gpuimgproc/src/imgproc.cpp            |  70 -----
 samples/cpp/CMakeLists.txt                    |   1 +
 samples/gpu/CMakeLists.txt                    |   3 +-
 32 files changed, 932 insertions(+), 460 deletions(-)
 create mode 100644 modules/gpucalib3d/CMakeLists.txt
 rename modules/{gpu => gpucalib3d}/doc/camera_calibration_and_3d_reconstruction.rst (100%)
 create mode 100644 modules/gpucalib3d/doc/gpucalib3d.rst
 create mode 100644 modules/gpucalib3d/include/opencv2/gpucalib3d.hpp
 rename modules/{gpu => gpucalib3d}/perf/perf_calib3d.cpp (100%)
 create mode 100644 modules/gpucalib3d/perf/perf_main.cpp
 create mode 100644 modules/gpucalib3d/perf/perf_precomp.cpp
 create mode 100644 modules/gpucalib3d/perf/perf_precomp.hpp
 rename modules/{gpu => gpucalib3d}/src/calib3d.cpp (80%)
 rename modules/{gpu => gpucalib3d}/src/cuda/calib3d.cu (58%)
 rename modules/{gpu => gpucalib3d}/src/cuda/disp_bilateral_filter.cu (100%)
 rename modules/{gpu => gpucalib3d}/src/cuda/stereobm.cu (100%)
 rename modules/{gpu => gpucalib3d}/src/cuda/stereobp.cu (100%)
 rename modules/{gpu => gpucalib3d}/src/cuda/stereocsbp.cu (100%)
 rename modules/{gpu => gpucalib3d}/src/disparity_bilateral_filter.cpp (100%)
 create mode 100644 modules/gpucalib3d/src/precomp.cpp
 create mode 100644 modules/gpucalib3d/src/precomp.hpp
 rename modules/{gpu => gpucalib3d}/src/stereobm.cpp (100%)
 rename modules/{gpu => gpucalib3d}/src/stereobp.cpp (100%)
 rename modules/{gpu => gpucalib3d}/src/stereocsbp.cpp (100%)
 rename modules/{gpu => gpucalib3d}/test/test_calib3d.cpp (100%)
 create mode 100644 modules/gpucalib3d/test/test_main.cpp
 create mode 100644 modules/gpucalib3d/test/test_precomp.cpp
 create mode 100644 modules/gpucalib3d/test/test_precomp.hpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 296545add..95de6789f 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -5,7 +5,7 @@ endif()
 set(the_description "GPU-accelerated Computer Vision")
 
 ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy
-                   opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo
+                   opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d
                    OPTIONAL opencv_gpunvidia)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index 3803efd19..bc3b9bdb1 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -9,4 +9,3 @@ gpu. GPU-accelerated Computer Vision
     initalization_and_information
     data_structures
     object_detection
-    camera_calibration_and_3d_reconstruction
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 524a32fc9..b3fea3fbf 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -55,6 +55,7 @@
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpufeatures2d.hpp"
 #include "opencv2/gpuvideo.hpp"
+#include "opencv2/gpucalib3d.hpp"
 
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
@@ -68,18 +69,6 @@ namespace cv { namespace gpu {
 
 ///////////////////////////// Calibration 3D //////////////////////////////////
 
-CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
-                                GpuMat& dst, Stream& stream = Stream::Null());
-
-CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
-                              const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
-                              Stream& stream = Stream::Null());
-
-CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
-                               const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
-                               int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
-                               std::vector<int>* inliers=NULL);
-
 //////////////////////////////// Image Labeling ////////////////////////////////
 
 
@@ -90,190 +79,17 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c
 
 //////////////////////////////// StereoBM_GPU ////////////////////////////////
 
-class CV_EXPORTS StereoBM_GPU
-{
-public:
-    enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
 
-    enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
-
-    //! the default constructor
-    StereoBM_GPU();
-    //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
-    StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
-
-    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
-    //! Output disparity has CV_8U type.
-    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
-
-    //! Some heuristics that tries to estmate
-    // if current GPU will be faster than CPU in this algorithm.
-    // It queries current active device.
-    static bool checkIfGpuCallReasonable();
-
-    int preset;
-    int ndisp;
-    int winSize;
-
-    // If avergeTexThreshold  == 0 => post procesing is disabled
-    // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
-    // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
-    // i.e. input left image is low textured.
-    float avergeTexThreshold;
-
-private:
-    GpuMat minSSD, leBuf, riBuf;
-};
 
 ////////////////////////// StereoBeliefPropagation ///////////////////////////
-// "Efficient Belief Propagation for Early Vision"
-// P.Felzenszwalb
 
-class CV_EXPORTS StereoBeliefPropagation
-{
-public:
-    enum { DEFAULT_NDISP  = 64 };
-    enum { DEFAULT_ITERS  = 5  };
-    enum { DEFAULT_LEVELS = 5  };
-
-    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels);
-
-    //! the default constructor
-    explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
-                                     int iters  = DEFAULT_ITERS,
-                                     int levels = DEFAULT_LEVELS,
-                                     int msg_type = CV_32F);
-
-    //! the full constructor taking the number of disparities, number of BP iterations on each level,
-    //! number of levels, truncation of data cost, data weight,
-    //! truncation of discontinuity cost and discontinuity single jump
-    //! DataTerm = data_weight * min(fabs(I2-I1), max_data_term)
-    //! DiscTerm = min(disc_single_jump * fabs(f1-f2), max_disc_term)
-    //! please see paper for more details
-    StereoBeliefPropagation(int ndisp, int iters, int levels,
-        float max_data_term, float data_weight,
-        float max_disc_term, float disc_single_jump,
-        int msg_type = CV_32F);
-
-    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
-    //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
-    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
-
-
-    //! version for user specified data term
-    void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
-
-    int ndisp;
-
-    int iters;
-    int levels;
-
-    float max_data_term;
-    float data_weight;
-    float max_disc_term;
-    float disc_single_jump;
-
-    int msg_type;
-private:
-    GpuMat u, d, l, r, u2, d2, l2, r2;
-    std::vector<GpuMat> datas;
-    GpuMat out;
-};
 
 /////////////////////////// StereoConstantSpaceBP ///////////////////////////
-// "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
-// Qingxiong Yang, Liang Wang, Narendra Ahuja
-// http://vision.ai.uiuc.edu/~qyang6/
 
-class CV_EXPORTS StereoConstantSpaceBP
-{
-public:
-    enum { DEFAULT_NDISP    = 128 };
-    enum { DEFAULT_ITERS    = 8   };
-    enum { DEFAULT_LEVELS   = 4   };
-    enum { DEFAULT_NR_PLANE = 4   };
 
-    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane);
-
-    //! the default constructor
-    explicit StereoConstantSpaceBP(int ndisp    = DEFAULT_NDISP,
-                                   int iters    = DEFAULT_ITERS,
-                                   int levels   = DEFAULT_LEVELS,
-                                   int nr_plane = DEFAULT_NR_PLANE,
-                                   int msg_type = CV_32F);
-
-    //! the full constructor taking the number of disparities, number of BP iterations on each level,
-    //! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
-    //! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold
-    StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
-        float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
-        int min_disp_th = 0,
-        int msg_type = CV_32F);
-
-    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
-    //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
-    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
-
-    int ndisp;
-
-    int iters;
-    int levels;
-
-    int nr_plane;
-
-    float max_data_term;
-    float data_weight;
-    float max_disc_term;
-    float disc_single_jump;
-
-    int min_disp_th;
-
-    int msg_type;
-
-    bool use_local_init_data_cost;
-private:
-    GpuMat messages_buffers;
-
-    GpuMat temp;
-    GpuMat out;
-};
 
 /////////////////////////// DisparityBilateralFilter ///////////////////////////
-// Disparity map refinement using joint bilateral filtering given a single color image.
-// Qingxiong Yang, Liang Wang, Narendra Ahuja
-// http://vision.ai.uiuc.edu/~qyang6/
 
-class CV_EXPORTS DisparityBilateralFilter
-{
-public:
-    enum { DEFAULT_NDISP  = 64 };
-    enum { DEFAULT_RADIUS = 3 };
-    enum { DEFAULT_ITERS  = 1 };
-
-    //! the default constructor
-    explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS);
-
-    //! the full constructor taking the number of disparities, filter radius,
-    //! number of iterations, truncation of data continuity, truncation of disparity continuity
-    //! and filter range sigma
-    DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range);
-
-    //! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
-    //! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
-    void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null());
-
-private:
-    int ndisp;
-    int radius;
-    int iters;
-
-    float edge_threshold;
-    float max_disc_threshold;
-    float sigma_range;
-
-    GpuMat table_color;
-    GpuMat table_space;
-};
 
 
 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
diff --git a/modules/gpucalib3d/CMakeLists.txt b/modules/gpucalib3d/CMakeLists.txt
new file mode 100644
index 000000000..bb949c4d3
--- /dev/null
+++ b/modules/gpucalib3d/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpucalib3d)
+endif()
+
+set(the_description "GPU-accelerated Camera Calibration and 3D Reconstruction")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpucalib3d opencv_calib3d opencv_gpuarithm)
diff --git a/modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst b/modules/gpucalib3d/doc/camera_calibration_and_3d_reconstruction.rst
similarity index 100%
rename from modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst
rename to modules/gpucalib3d/doc/camera_calibration_and_3d_reconstruction.rst
diff --git a/modules/gpucalib3d/doc/gpucalib3d.rst b/modules/gpucalib3d/doc/gpucalib3d.rst
new file mode 100644
index 000000000..5dffaa048
--- /dev/null
+++ b/modules/gpucalib3d/doc/gpucalib3d.rst
@@ -0,0 +1,8 @@
+*************************************************************
+gpu. GPU-accelerated Camera Calibration and 3D Reconstruction
+*************************************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    camera_calibration_and_3d_reconstruction
diff --git a/modules/gpucalib3d/include/opencv2/gpucalib3d.hpp b/modules/gpucalib3d/include/opencv2/gpucalib3d.hpp
new file mode 100644
index 000000000..3496d987b
--- /dev/null
+++ b/modules/gpucalib3d/include/opencv2/gpucalib3d.hpp
@@ -0,0 +1,255 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUCALIB3D_HPP__
+#define __OPENCV_GPUCALIB3D_HPP__
+
+#include "opencv2/core/gpumat.hpp"
+
+namespace cv { namespace gpu {
+
+class CV_EXPORTS StereoBM_GPU
+{
+public:
+    enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+
+    enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+
+    //! the default constructor
+    StereoBM_GPU();
+    //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+    StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+
+    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+    //! Output disparity has CV_8U type.
+    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
+
+    //! Some heuristics that tries to estmate
+    // if current GPU will be faster than CPU in this algorithm.
+    // It queries current active device.
+    static bool checkIfGpuCallReasonable();
+
+    int preset;
+    int ndisp;
+    int winSize;
+
+    // If avergeTexThreshold  == 0 => post procesing is disabled
+    // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+    // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+    // i.e. input left image is low textured.
+    float avergeTexThreshold;
+
+private:
+    GpuMat minSSD, leBuf, riBuf;
+};
+
+// "Efficient Belief Propagation for Early Vision"
+// P.Felzenszwalb
+class CV_EXPORTS StereoBeliefPropagation
+{
+public:
+    enum { DEFAULT_NDISP  = 64 };
+    enum { DEFAULT_ITERS  = 5  };
+    enum { DEFAULT_LEVELS = 5  };
+
+    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels);
+
+    //! the default constructor
+    explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
+                                     int iters  = DEFAULT_ITERS,
+                                     int levels = DEFAULT_LEVELS,
+                                     int msg_type = CV_32F);
+
+    //! the full constructor taking the number of disparities, number of BP iterations on each level,
+    //! number of levels, truncation of data cost, data weight,
+    //! truncation of discontinuity cost and discontinuity single jump
+    //! DataTerm = data_weight * min(fabs(I2-I1), max_data_term)
+    //! DiscTerm = min(disc_single_jump * fabs(f1-f2), max_disc_term)
+    //! please see paper for more details
+    StereoBeliefPropagation(int ndisp, int iters, int levels,
+        float max_data_term, float data_weight,
+        float max_disc_term, float disc_single_jump,
+        int msg_type = CV_32F);
+
+    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
+    //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
+    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
+
+
+    //! version for user specified data term
+    void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
+
+    int ndisp;
+
+    int iters;
+    int levels;
+
+    float max_data_term;
+    float data_weight;
+    float max_disc_term;
+    float disc_single_jump;
+
+    int msg_type;
+private:
+    GpuMat u, d, l, r, u2, d2, l2, r2;
+    std::vector<GpuMat> datas;
+    GpuMat out;
+};
+
+// "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
+// Qingxiong Yang, Liang Wang, Narendra Ahuja
+// http://vision.ai.uiuc.edu/~qyang6/
+class CV_EXPORTS StereoConstantSpaceBP
+{
+public:
+    enum { DEFAULT_NDISP    = 128 };
+    enum { DEFAULT_ITERS    = 8   };
+    enum { DEFAULT_LEVELS   = 4   };
+    enum { DEFAULT_NR_PLANE = 4   };
+
+    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane);
+
+    //! the default constructor
+    explicit StereoConstantSpaceBP(int ndisp    = DEFAULT_NDISP,
+                                   int iters    = DEFAULT_ITERS,
+                                   int levels   = DEFAULT_LEVELS,
+                                   int nr_plane = DEFAULT_NR_PLANE,
+                                   int msg_type = CV_32F);
+
+    //! the full constructor taking the number of disparities, number of BP iterations on each level,
+    //! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
+    //! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold
+    StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
+        float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
+        int min_disp_th = 0,
+        int msg_type = CV_32F);
+
+    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
+    //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
+    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
+
+    int ndisp;
+
+    int iters;
+    int levels;
+
+    int nr_plane;
+
+    float max_data_term;
+    float data_weight;
+    float max_disc_term;
+    float disc_single_jump;
+
+    int min_disp_th;
+
+    int msg_type;
+
+    bool use_local_init_data_cost;
+private:
+    GpuMat messages_buffers;
+
+    GpuMat temp;
+    GpuMat out;
+};
+
+// Disparity map refinement using joint bilateral filtering given a single color image.
+// Qingxiong Yang, Liang Wang, Narendra Ahuja
+// http://vision.ai.uiuc.edu/~qyang6/
+class CV_EXPORTS DisparityBilateralFilter
+{
+public:
+    enum { DEFAULT_NDISP  = 64 };
+    enum { DEFAULT_RADIUS = 3 };
+    enum { DEFAULT_ITERS  = 1 };
+
+    //! the default constructor
+    explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS);
+
+    //! the full constructor taking the number of disparities, filter radius,
+    //! number of iterations, truncation of data continuity, truncation of disparity continuity
+    //! and filter range sigma
+    DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range);
+
+    //! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
+    //! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
+    void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null());
+
+private:
+    int ndisp;
+    int radius;
+    int iters;
+
+    float edge_threshold;
+    float max_disc_threshold;
+    float sigma_range;
+
+    GpuMat table_color;
+    GpuMat table_space;
+};
+
+CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
+                                GpuMat& dst, Stream& stream = Stream::Null());
+
+CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
+                              const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
+                              Stream& stream = Stream::Null());
+
+CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
+                               const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
+                               int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
+                               std::vector<int>* inliers=NULL);
+
+//! Reprojects disparity image to 3D space.
+//! Supports CV_8U and CV_16S types of input disparity.
+//! The output is a 3- or 4-channel floating-point matrix.
+//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
+//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
+CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null());
+
+//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
+//! Supported types of input disparity: CV_8U, CV_16S.
+//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
+CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null());
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUCALIB3D_HPP__ */
diff --git a/modules/gpu/perf/perf_calib3d.cpp b/modules/gpucalib3d/perf/perf_calib3d.cpp
similarity index 100%
rename from modules/gpu/perf/perf_calib3d.cpp
rename to modules/gpucalib3d/perf/perf_calib3d.cpp
diff --git a/modules/gpucalib3d/perf/perf_main.cpp b/modules/gpucalib3d/perf/perf_main.cpp
new file mode 100644
index 000000000..b35791cda
--- /dev/null
+++ b/modules/gpucalib3d/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
diff --git a/modules/gpucalib3d/perf/perf_precomp.cpp b/modules/gpucalib3d/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpucalib3d/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpucalib3d/perf/perf_precomp.hpp b/modules/gpucalib3d/perf/perf_precomp.hpp
new file mode 100644
index 000000000..dc244a72a
--- /dev/null
+++ b/modules/gpucalib3d/perf/perf_precomp.hpp
@@ -0,0 +1,65 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpucalib3d.hpp"
+
+#include "opencv2/calib3d.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/src/calib3d.cpp b/modules/gpucalib3d/src/calib3d.cpp
similarity index 80%
rename from modules/gpu/src/calib3d.cpp
rename to modules/gpucalib3d/src/calib3d.cpp
index abcc3423d..135859094 100644
--- a/modules/gpu/src/calib3d.cpp
+++ b/modules/gpucalib3d/src/calib3d.cpp
@@ -48,10 +48,10 @@ using namespace cv::gpu;
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
 void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
-
 void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
-
 void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, std::vector<int>*) { throw_no_cuda(); }
+void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -150,7 +150,7 @@ namespace
     }
 
     // Computes rotation, translation pair for small subsets if the input data
-    class TransformHypothesesGenerator
+    class TransformHypothesesGenerator : public cv::ParallelLoopBody
     {
     public:
         TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
@@ -160,7 +160,7 @@ namespace
                   num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
                   transl_vectors(transl_vectors_) {}
 
-        void operator()(const BlockedRange& range) const
+        void operator()(const Range& range) const
         {
             // Input data for generation of the current hypothesis
             std::vector<int> subset_indices(subset_size);
@@ -172,7 +172,7 @@ namespace
             Mat rot_mat(3, 3, CV_64F);
             Mat transl_vec(1, 3, CV_64F);
 
-            for (int iter = range.begin(); iter < range.end(); ++iter)
+            for (int iter = range.start; iter < range.end; ++iter)
             {
                 selectRandom(subset_size, num_points, subset_indices);
                 for (int i = 0; i < subset_size; ++i)
@@ -238,7 +238,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
     // Generate set of hypotheses using small subsets of the input data
     TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
                                       num_points, subset_size, rot_matrices, transl_vectors);
-    parallel_for(BlockedRange(0, num_iters), body);
+    parallel_for_(Range(0, num_iters), body);
 
     // Compute scores (i.e. number of inliers) for each hypothesis
     GpuMat d_object(object);
@@ -252,7 +252,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
     // Find the best hypothesis index
     Point best_idx;
     double best_score;
-    minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
+    gpu::minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
     int num_inliers = static_cast<int>(best_score);
 
     // Extract the best hypothesis data
@@ -289,6 +289,66 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+// reprojectImageTo3D
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename D>
+    void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+}}}
+
+void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, int dst_cn, Stream& stream)
+{
+    using namespace cv::gpu::cudev;
+
+    typedef void (*func_t)(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    static const func_t funcs[2][4] =
+    {
+        {reprojectImageTo3D_gpu<uchar, float3>, 0, 0, reprojectImageTo3D_gpu<short, float3>},
+        {reprojectImageTo3D_gpu<uchar, float4>, 0, 0, reprojectImageTo3D_gpu<short, float4>}
+    };
+
+    CV_Assert(disp.type() == CV_8U || disp.type() == CV_16S);
+    CV_Assert(Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4 && Q.isContinuous());
+    CV_Assert(dst_cn == 3 || dst_cn == 4);
+
+    xyz.create(disp.size(), CV_MAKE_TYPE(CV_32F, dst_cn));
+
+    funcs[dst_cn == 4][disp.type()](disp, xyz, Q.ptr<float>(), StreamAccessor::getStream(stream));
+}
+
+////////////////////////////////////////////////////////////////////////
+// drawColorDisp
+
+namespace cv { namespace gpu { namespace cudev
+{
+    void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
+    void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
+}}}
+
+namespace
+{
+    template <typename T>
+    void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
+    {
+        using namespace ::cv::gpu::cudev;
+
+        dst.create(src.size(), CV_8UC4);
+
+        drawColorDisp_gpu((PtrStepSz<T>)src, dst, ndisp, stream);
+    }
+
+    typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
+
+    const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};
+}
+
+void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)
+{
+    CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
+
+    drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));
+}
+
 #endif
-
-
diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpucalib3d/src/cuda/calib3d.cu
similarity index 58%
rename from modules/gpu/src/cuda/calib3d.cu
rename to modules/gpucalib3d/src/cuda/calib3d.cu
index 6085e716d..d1d59ce23 100644
--- a/modules/gpu/src/cuda/calib3d.cu
+++ b/modules/gpucalib3d/src/cuda/calib3d.cu
@@ -187,6 +187,189 @@ namespace cv { namespace gpu { namespace cudev
             cudaSafeCall( cudaDeviceSynchronize() );
         }
     } // namespace solvepnp_ransac
+
+
+
+    /////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
+
+    __constant__ float cq[16];
+
+    template <typename T, typename D>
+    __global__ void reprojectImageTo3D(const PtrStepSz<T> disp, PtrStep<D> xyz)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (y >= disp.rows || x >= disp.cols)
+            return;
+
+        const float qx = x * cq[ 0] + y * cq[ 1] + cq[ 3];
+        const float qy = x * cq[ 4] + y * cq[ 5] + cq[ 7];
+        const float qz = x * cq[ 8] + y * cq[ 9] + cq[11];
+        const float qw = x * cq[12] + y * cq[13] + cq[15];
+
+        const T d = disp(y, x);
+
+        const float iW = 1.f / (qw + cq[14] * d);
+
+        D v = VecTraits<D>::all(1.0f);
+        v.x = (qx + cq[2] * d) * iW;
+        v.y = (qy + cq[6] * d) * iW;
+        v.z = (qz + cq[10] * d) * iW;
+
+        xyz(y, x) = v;
+    }
+
+    template <typename T, typename D>
+    void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream)
+    {
+        dim3 block(32, 8);
+        dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
+
+        cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
+
+        reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    template void reprojectImageTo3D_gpu<uchar, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    template void reprojectImageTo3D_gpu<short, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    template void reprojectImageTo3D_gpu<short, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+
+    /////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
+
+    template <typename T>
+    __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
+    {
+        unsigned int H = ((ndisp-d) * 240)/ndisp;
+
+        unsigned int hi = (H/60) % 6;
+        float f = H/60.f - H/60;
+        float p = V * (1 - S);
+        float q = V * (1 - f * S);
+        float t = V * (1 - (1 - f) * S);
+
+        float3 res;
+
+        if (hi == 0) //R = V,	G = t,	B = p
+        {
+            res.x = p;
+            res.y = t;
+            res.z = V;
+        }
+
+        if (hi == 1) // R = q,	G = V,	B = p
+        {
+            res.x = p;
+            res.y = V;
+            res.z = q;
+        }
+
+        if (hi == 2) // R = p,	G = V,	B = t
+        {
+            res.x = t;
+            res.y = V;
+            res.z = p;
+        }
+
+        if (hi == 3) // R = p,	G = q,	B = V
+        {
+            res.x = V;
+            res.y = q;
+            res.z = p;
+        }
+
+        if (hi == 4) // R = t,	G = p,	B = V
+        {
+            res.x = V;
+            res.y = p;
+            res.z = t;
+        }
+
+        if (hi == 5) // R = V,	G = p,	B = q
+        {
+            res.x = q;
+            res.y = p;
+            res.z = V;
+        }
+        const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f);
+        const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f);
+        const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f);
+        const unsigned int a = 255U;
+
+        return (a << 24) + (r << 16) + (g << 8) + b;
+    }
+
+    __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if(x < width && y < height)
+        {
+            uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);
+
+            uint4 res;
+            res.x = cvtPixel(d4.x, ndisp);
+            res.y = cvtPixel(d4.y, ndisp);
+            res.z = cvtPixel(d4.z, ndisp);
+            res.w = cvtPixel(d4.w, ndisp);
+
+            uint4* line = (uint4*)(out_image + y * out_step);
+            line[x >> 2] = res;
+        }
+    }
+
+    __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if(x < width && y < height)
+        {
+            short2 d2 = *(short2*)(disp + y * disp_step + x);
+
+            uint2 res;
+            res.x = cvtPixel(d2.x, ndisp);
+            res.y = cvtPixel(d2.y, ndisp);
+
+            uint2* line = (uint2*)(out_image + y * out_step);
+            line[x >> 1] = res;
+        }
+    }
+
+
+    void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
+    {
+        dim3 threads(16, 16, 1);
+        dim3 grid(1, 1, 1);
+        grid.x = divUp(src.cols, threads.x << 2);
+        grid.y = divUp(src.rows, threads.y);
+
+        drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
+    {
+        dim3 threads(32, 8, 1);
+        dim3 grid(1, 1, 1);
+        grid.x = divUp(src.cols, threads.x << 1);
+        grid.y = divUp(src.rows, threads.y);
+
+        drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
 }}} // namespace cv { namespace gpu { namespace cudev
 
 
diff --git a/modules/gpu/src/cuda/disp_bilateral_filter.cu b/modules/gpucalib3d/src/cuda/disp_bilateral_filter.cu
similarity index 100%
rename from modules/gpu/src/cuda/disp_bilateral_filter.cu
rename to modules/gpucalib3d/src/cuda/disp_bilateral_filter.cu
diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpucalib3d/src/cuda/stereobm.cu
similarity index 100%
rename from modules/gpu/src/cuda/stereobm.cu
rename to modules/gpucalib3d/src/cuda/stereobm.cu
diff --git a/modules/gpu/src/cuda/stereobp.cu b/modules/gpucalib3d/src/cuda/stereobp.cu
similarity index 100%
rename from modules/gpu/src/cuda/stereobp.cu
rename to modules/gpucalib3d/src/cuda/stereobp.cu
diff --git a/modules/gpu/src/cuda/stereocsbp.cu b/modules/gpucalib3d/src/cuda/stereocsbp.cu
similarity index 100%
rename from modules/gpu/src/cuda/stereocsbp.cu
rename to modules/gpucalib3d/src/cuda/stereocsbp.cu
diff --git a/modules/gpu/src/disparity_bilateral_filter.cpp b/modules/gpucalib3d/src/disparity_bilateral_filter.cpp
similarity index 100%
rename from modules/gpu/src/disparity_bilateral_filter.cpp
rename to modules/gpucalib3d/src/disparity_bilateral_filter.cpp
diff --git a/modules/gpucalib3d/src/precomp.cpp b/modules/gpucalib3d/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpucalib3d/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpucalib3d/src/precomp.hpp b/modules/gpucalib3d/src/precomp.hpp
new file mode 100644
index 000000000..89396fd57
--- /dev/null
+++ b/modules/gpucalib3d/src/precomp.hpp
@@ -0,0 +1,56 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+
+#include "opencv2/gpucalib3d.hpp"
+#include "opencv2/gpuarithm.hpp"
+
+#include "opencv2/calib3d.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/src/stereobm.cpp b/modules/gpucalib3d/src/stereobm.cpp
similarity index 100%
rename from modules/gpu/src/stereobm.cpp
rename to modules/gpucalib3d/src/stereobm.cpp
diff --git a/modules/gpu/src/stereobp.cpp b/modules/gpucalib3d/src/stereobp.cpp
similarity index 100%
rename from modules/gpu/src/stereobp.cpp
rename to modules/gpucalib3d/src/stereobp.cpp
diff --git a/modules/gpu/src/stereocsbp.cpp b/modules/gpucalib3d/src/stereocsbp.cpp
similarity index 100%
rename from modules/gpu/src/stereocsbp.cpp
rename to modules/gpucalib3d/src/stereocsbp.cpp
diff --git a/modules/gpu/test/test_calib3d.cpp b/modules/gpucalib3d/test/test_calib3d.cpp
similarity index 100%
rename from modules/gpu/test/test_calib3d.cpp
rename to modules/gpucalib3d/test/test_calib3d.cpp
diff --git a/modules/gpucalib3d/test/test_main.cpp b/modules/gpucalib3d/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpucalib3d/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpucalib3d/test/test_precomp.cpp b/modules/gpucalib3d/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpucalib3d/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpucalib3d/test/test_precomp.hpp b/modules/gpucalib3d/test/test_precomp.hpp
new file mode 100644
index 000000000..8c53f4786
--- /dev/null
+++ b/modules/gpucalib3d/test/test_precomp.hpp
@@ -0,0 +1,61 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpucalib3d.hpp"
+
+#include "opencv2/calib3d.hpp"
+
+#endif
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
index d602d0a13..a0b1e3094 100644
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -80,18 +80,6 @@ CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, in
 CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 
-//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
-//! Supported types of input disparity: CV_8U, CV_16S.
-//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
-CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null());
-
-//! Reprojects disparity image to 3D space.
-//! Supports CV_8U and CV_16S types of input disparity.
-//! The output is a 3- or 4-channel floating-point matrix.
-//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
-//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
-CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null());
-
 //! converts image from one color space to another
 CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
 
diff --git a/modules/gpuimgproc/src/cuda/imgproc.cu b/modules/gpuimgproc/src/cuda/imgproc.cu
index c6dfbb417..d2d0d0f3c 100644
--- a/modules/gpuimgproc/src/cuda/imgproc.cu
+++ b/modules/gpuimgproc/src/cuda/imgproc.cu
@@ -183,187 +183,6 @@ namespace cv { namespace gpu { namespace cudev
             //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
         }
 
-        /////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
-
-        template <typename T>
-        __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
-        {
-            unsigned int H = ((ndisp-d) * 240)/ndisp;
-
-            unsigned int hi = (H/60) % 6;
-            float f = H/60.f - H/60;
-            float p = V * (1 - S);
-            float q = V * (1 - f * S);
-            float t = V * (1 - (1 - f) * S);
-
-            float3 res;
-
-            if (hi == 0) //R = V,	G = t,	B = p
-            {
-                res.x = p;
-                res.y = t;
-                res.z = V;
-            }
-
-            if (hi == 1) // R = q,	G = V,	B = p
-            {
-                res.x = p;
-                res.y = V;
-                res.z = q;
-            }
-
-            if (hi == 2) // R = p,	G = V,	B = t
-            {
-                res.x = t;
-                res.y = V;
-                res.z = p;
-            }
-
-            if (hi == 3) // R = p,	G = q,	B = V
-            {
-                res.x = V;
-                res.y = q;
-                res.z = p;
-            }
-
-            if (hi == 4) // R = t,	G = p,	B = V
-            {
-                res.x = V;
-                res.y = p;
-                res.z = t;
-            }
-
-            if (hi == 5) // R = V,	G = p,	B = q
-            {
-                res.x = q;
-                res.y = p;
-                res.z = V;
-            }
-            const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f);
-            const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f);
-            const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f);
-            const unsigned int a = 255U;
-
-            return (a << 24) + (r << 16) + (g << 8) + b;
-        }
-
-        __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
-        {
-            const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if(x < width && y < height)
-            {
-                uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);
-
-                uint4 res;
-                res.x = cvtPixel(d4.x, ndisp);
-                res.y = cvtPixel(d4.y, ndisp);
-                res.z = cvtPixel(d4.z, ndisp);
-                res.w = cvtPixel(d4.w, ndisp);
-
-                uint4* line = (uint4*)(out_image + y * out_step);
-                line[x >> 2] = res;
-            }
-        }
-
-        __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
-        {
-            const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if(x < width && y < height)
-            {
-                short2 d2 = *(short2*)(disp + y * disp_step + x);
-
-                uint2 res;
-                res.x = cvtPixel(d2.x, ndisp);
-                res.y = cvtPixel(d2.y, ndisp);
-
-                uint2* line = (uint2*)(out_image + y * out_step);
-                line[x >> 1] = res;
-            }
-        }
-
-
-        void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
-        {
-            dim3 threads(16, 16, 1);
-            dim3 grid(1, 1, 1);
-            grid.x = divUp(src.cols, threads.x << 2);
-            grid.y = divUp(src.rows, threads.y);
-
-            drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
-        {
-            dim3 threads(32, 8, 1);
-            dim3 grid(1, 1, 1);
-            grid.x = divUp(src.cols, threads.x << 1);
-            grid.y = divUp(src.rows, threads.y);
-
-            drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        /////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
-
-        __constant__ float cq[16];
-
-        template <typename T, typename D>
-        __global__ void reprojectImageTo3D(const PtrStepSz<T> disp, PtrStep<D> xyz)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (y >= disp.rows || x >= disp.cols)
-                return;
-
-            const float qx = x * cq[ 0] + y * cq[ 1] + cq[ 3];
-            const float qy = x * cq[ 4] + y * cq[ 5] + cq[ 7];
-            const float qz = x * cq[ 8] + y * cq[ 9] + cq[11];
-            const float qw = x * cq[12] + y * cq[13] + cq[15];
-
-            const T d = disp(y, x);
-
-            const float iW = 1.f / (qw + cq[14] * d);
-
-            D v = VecTraits<D>::all(1.0f);
-            v.x = (qx + cq[2] * d) * iW;
-            v.y = (qy + cq[6] * d) * iW;
-            v.z = (qz + cq[10] * d) * iW;
-
-            xyz(y, x) = v;
-        }
-
-        template <typename T, typename D>
-        void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
-
-            cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
-
-            reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-        template void reprojectImageTo3D_gpu<uchar, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-        template void reprojectImageTo3D_gpu<short, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-        template void reprojectImageTo3D_gpu<short, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-
         /////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
 
         texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
diff --git a/modules/gpuimgproc/src/imgproc.cpp b/modules/gpuimgproc/src/imgproc.cpp
index c21a7b837..dabf054b6 100644
--- a/modules/gpuimgproc/src/imgproc.cpp
+++ b/modules/gpuimgproc/src/imgproc.cpp
@@ -49,8 +49,6 @@ using namespace cv::gpu;
 
 void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
 void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
-void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
@@ -157,74 +155,6 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
     meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
 }
 
-////////////////////////////////////////////////////////////////////////
-// drawColorDisp
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
-        void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
-    }
-}}}
-
-namespace
-{
-    template <typename T>
-    void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
-    {
-        using namespace ::cv::gpu::cudev::imgproc;
-
-        dst.create(src.size(), CV_8UC4);
-
-        drawColorDisp_gpu((PtrStepSz<T>)src, dst, ndisp, stream);
-    }
-
-    typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
-
-    const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};
-}
-
-void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)
-{
-    CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
-
-    drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// reprojectImageTo3D
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        template <typename T, typename D>
-        void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, int dst_cn, Stream& stream)
-{
-    using namespace cv::gpu::cudev::imgproc;
-
-    typedef void (*func_t)(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-    static const func_t funcs[2][4] =
-    {
-        {reprojectImageTo3D_gpu<uchar, float3>, 0, 0, reprojectImageTo3D_gpu<short, float3>},
-        {reprojectImageTo3D_gpu<uchar, float4>, 0, 0, reprojectImageTo3D_gpu<short, float4>}
-    };
-
-    CV_Assert(disp.type() == CV_8U || disp.type() == CV_16S);
-    CV_Assert(Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4 && Q.isContinuous());
-    CV_Assert(dst_cn == 3 || dst_cn == 4);
-
-    xyz.create(disp.size(), CV_MAKE_TYPE(CV_32F, dst_cn));
-
-    funcs[dst_cn == 4][disp.type()](disp, xyz, Q.ptr<float>(), StreamAccessor::getStream(stream));
-}
-
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpPlaneMaps
 
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index 3cf2391e5..be87bebc7 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -22,6 +22,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuvideo/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpucalib3d/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 9ee7fe8f2..85360f51f 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,8 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo
+                                     opencv_gpucalib3d)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 

From a6648b537285c529292a6411e7e9b76219400492 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 10 Apr 2013 13:17:15 +0400
Subject: [PATCH 18/49] gpuobjdetect module for object detection

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/doc/gpu.rst                       |   1 -
 modules/gpu/include/opencv2/gpu.hpp           | 117 +-----------
 .../gpunvidia/include/opencv2/gpunvidia.hpp   |   1 +
 .../opencv2/gpunvidia}/NCVBroxOpticalFlow.hpp |   0
 .../src/cuda/NCVBroxOpticalFlow.cu            |   2 +-
 modules/gpuobjdetect/CMakeLists.txt           |   9 +
 modules/gpuobjdetect/doc/gpuobjdetect.rst     |   8 +
 .../doc/object_detection.rst                  |   0
 .../include/opencv2/gpuobjdetect.hpp          | 172 ++++++++++++++++++
 modules/gpuobjdetect/perf/perf_main.cpp       |  47 +++++
 .../perf/perf_objdetect.cpp                   |   0
 modules/gpuobjdetect/perf/perf_precomp.cpp    |  43 +++++
 modules/gpuobjdetect/perf/perf_precomp.hpp    |  65 +++++++
 .../src/cascadeclassifier.cpp                 |   0
 modules/{gpu => gpuobjdetect}/src/cuda/hog.cu |   0
 modules/{gpu => gpuobjdetect}/src/cuda/lbp.cu |   0
 .../{gpu => gpuobjdetect}/src/cuda/lbp.hpp    |   0
 modules/{gpu => gpuobjdetect}/src/hog.cpp     |   0
 modules/gpuobjdetect/src/precomp.cpp          |  43 +++++
 modules/gpuobjdetect/src/precomp.hpp          |  60 ++++++
 modules/gpuobjdetect/test/test_main.cpp       |  45 +++++
 .../test/test_objdetect.cpp                   |   0
 modules/gpuobjdetect/test/test_precomp.cpp    |  43 +++++
 modules/gpuobjdetect/test/test_precomp.hpp    |  63 +++++++
 modules/gpuvideo/src/precomp.hpp              |   1 -
 samples/cpp/CMakeLists.txt                    |   1 +
 samples/gpu/CMakeLists.txt                    |   2 +-
 samples/gpu/opticalflow_nvidia_api.cpp        |   1 -
 29 files changed, 604 insertions(+), 122 deletions(-)
 rename modules/{gpuvideo/include/opencv2/gpuvideo => gpunvidia/include/opencv2/gpunvidia}/NCVBroxOpticalFlow.hpp (100%)
 rename modules/{gpuvideo => gpunvidia}/src/cuda/NCVBroxOpticalFlow.cu (99%)
 create mode 100644 modules/gpuobjdetect/CMakeLists.txt
 create mode 100644 modules/gpuobjdetect/doc/gpuobjdetect.rst
 rename modules/{gpu => gpuobjdetect}/doc/object_detection.rst (100%)
 create mode 100644 modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp
 create mode 100644 modules/gpuobjdetect/perf/perf_main.cpp
 rename modules/{gpu => gpuobjdetect}/perf/perf_objdetect.cpp (100%)
 create mode 100644 modules/gpuobjdetect/perf/perf_precomp.cpp
 create mode 100644 modules/gpuobjdetect/perf/perf_precomp.hpp
 rename modules/{gpu => gpuobjdetect}/src/cascadeclassifier.cpp (100%)
 rename modules/{gpu => gpuobjdetect}/src/cuda/hog.cu (100%)
 rename modules/{gpu => gpuobjdetect}/src/cuda/lbp.cu (100%)
 rename modules/{gpu => gpuobjdetect}/src/cuda/lbp.hpp (100%)
 rename modules/{gpu => gpuobjdetect}/src/hog.cpp (100%)
 create mode 100644 modules/gpuobjdetect/src/precomp.cpp
 create mode 100644 modules/gpuobjdetect/src/precomp.hpp
 create mode 100644 modules/gpuobjdetect/test/test_main.cpp
 rename modules/{gpu => gpuobjdetect}/test/test_objdetect.cpp (100%)
 create mode 100644 modules/gpuobjdetect/test/test_precomp.cpp
 create mode 100644 modules/gpuobjdetect/test/test_precomp.hpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 95de6789f..55faa397b 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -5,7 +5,7 @@ endif()
 set(the_description "GPU-accelerated Computer Vision")
 
 ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy
-                   opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d
+                   opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d opencv_gpuobjdetect
                    OPTIONAL opencv_gpunvidia)
 
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index bc3b9bdb1..d98f90d3b 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -8,4 +8,3 @@ gpu. GPU-accelerated Computer Vision
     introduction
     initalization_and_information
     data_structures
-    object_detection
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index b3fea3fbf..d6135865c 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -56,6 +56,7 @@
 #include "opencv2/gpufeatures2d.hpp"
 #include "opencv2/gpuvideo.hpp"
 #include "opencv2/gpucalib3d.hpp"
+#include "opencv2/gpuobjdetect.hpp"
 
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
@@ -92,96 +93,7 @@ namespace cv { namespace gpu {
 
 
 
-//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
-struct CV_EXPORTS HOGConfidence
-{
-   double scale;
-   std::vector<Point> locations;
-   std::vector<double> confidences;
-   std::vector<double> part_scores[4];
-};
 
-struct CV_EXPORTS HOGDescriptor
-{
-    enum { DEFAULT_WIN_SIGMA = -1 };
-    enum { DEFAULT_NLEVELS = 64 };
-    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
-    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
-                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
-                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
-                  double threshold_L2hys=0.2, bool gamma_correction=true,
-                  int nlevels=DEFAULT_NLEVELS);
-
-    size_t getDescriptorSize() const;
-    size_t getBlockHistogramSize() const;
-
-    void setSVMDetector(const std::vector<float>& detector);
-
-    static std::vector<float> getDefaultPeopleDetector();
-    static std::vector<float> getPeopleDetector48x96();
-    static std::vector<float> getPeopleDetector64x128();
-
-    void detect(const GpuMat& img, std::vector<Point>& found_locations,
-                double hit_threshold=0, Size win_stride=Size(),
-                Size padding=Size());
-
-    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                          double hit_threshold=0, Size win_stride=Size(),
-                          Size padding=Size(), double scale0=1.05,
-                          int group_threshold=2);
-
-    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
-                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
-
-    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                                                                    double hit_threshold, Size win_stride, Size padding,
-                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
-
-    void getDescriptors(const GpuMat& img, Size win_stride,
-                        GpuMat& descriptors,
-                        int descr_format=DESCR_FORMAT_COL_BY_COL);
-
-    Size win_size;
-    Size block_size;
-    Size block_stride;
-    Size cell_size;
-    int nbins;
-    double win_sigma;
-    double threshold_L2hys;
-    bool gamma_correction;
-    int nlevels;
-
-protected:
-    void computeBlockHistograms(const GpuMat& img);
-    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
-
-    double getWinSigma() const;
-    bool checkDetectorSize() const;
-
-    static int numPartsWithin(int size, int part_size, int stride);
-    static Size numPartsWithin(Size size, Size part_size, Size stride);
-
-    // Coefficients of the separating plane
-    float free_coef;
-    GpuMat detector;
-
-    // Results of the last classification step
-    GpuMat labels, labels_buf;
-    Mat labels_host;
-
-    // Results of the last histogram evaluation step
-    GpuMat block_hists, block_hists_buf;
-
-    // Gradients conputation results
-    GpuMat grad, qangle, grad_buf, qangle_buf;
-
-    // returns subbuffer with required size, reallocates buffer if nessesary.
-    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
-    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
-
-    std::vector<GpuMat> image_scales;
-};
 
 
 ////////////////////////////////// BruteForceMatcher //////////////////////////////////
@@ -213,34 +125,7 @@ public:
 };
 
 ////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
-// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
-class CV_EXPORTS CascadeClassifier_GPU
-{
-public:
-    CascadeClassifier_GPU();
-    CascadeClassifier_GPU(const String& filename);
-    ~CascadeClassifier_GPU();
 
-    bool empty() const;
-    bool load(const String& filename);
-    void release();
-
-    /* returns number of detected objects */
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
-
-    bool findLargestObject;
-    bool visualizeInPlace;
-
-    Size getClassifierSize() const;
-
-private:
-    struct CascadeClassifierImpl;
-    CascadeClassifierImpl* impl;
-    struct HaarCascade;
-    struct LbpCascade;
-    friend class CascadeClassifier_GPU_LBP;
-};
 
 ////////////////////////////////// FAST //////////////////////////////////////////
 
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia.hpp b/modules/gpunvidia/include/opencv2/gpunvidia.hpp
index 4c07417a8..47555f890 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia.hpp
+++ b/modules/gpunvidia/include/opencv2/gpunvidia.hpp
@@ -47,5 +47,6 @@
 #include "opencv2/gpunvidia/NPP_staging.hpp"
 #include "opencv2/gpunvidia/NCVPyramid.hpp"
 #include "opencv2/gpunvidia/NCVHaarObjectDetection.hpp"
+#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
 
 #endif /* __OPENCV_GPUNVIDIA_HPP__ */
diff --git a/modules/gpuvideo/include/opencv2/gpuvideo/NCVBroxOpticalFlow.hpp b/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
similarity index 100%
rename from modules/gpuvideo/include/opencv2/gpuvideo/NCVBroxOpticalFlow.hpp
rename to modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
diff --git a/modules/gpuvideo/src/cuda/NCVBroxOpticalFlow.cu b/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
similarity index 99%
rename from modules/gpuvideo/src/cuda/NCVBroxOpticalFlow.cu
rename to modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
index 427d4fa20..4faba6331 100644
--- a/modules/gpuvideo/src/cuda/NCVBroxOpticalFlow.cu
+++ b/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
@@ -64,7 +64,7 @@
 #include "opencv2/core/cuda/utility.hpp"
 
 #include "opencv2/gpunvidia/NPP_staging.hpp"
-#include "opencv2/gpuvideo/NCVBroxOpticalFlow.hpp"
+#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
 
 
 typedef NCVVectorAlloc<Ncv32f> FloatVector;
diff --git a/modules/gpuobjdetect/CMakeLists.txt b/modules/gpuobjdetect/CMakeLists.txt
new file mode 100644
index 000000000..745c02b5b
--- /dev/null
+++ b/modules/gpuobjdetect/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpuobjdetect)
+endif()
+
+set(the_description "GPU-accelerated Object Detection")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpuobjdetect opencv_objdetect opencv_gpuimgproc OPTIONAL opencv_gpunvidia)
diff --git a/modules/gpuobjdetect/doc/gpuobjdetect.rst b/modules/gpuobjdetect/doc/gpuobjdetect.rst
new file mode 100644
index 000000000..c53225d2d
--- /dev/null
+++ b/modules/gpuobjdetect/doc/gpuobjdetect.rst
@@ -0,0 +1,8 @@
+*************************************
+gpu. GPU-accelerated Object Detection
+*************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    object_detection
diff --git a/modules/gpu/doc/object_detection.rst b/modules/gpuobjdetect/doc/object_detection.rst
similarity index 100%
rename from modules/gpu/doc/object_detection.rst
rename to modules/gpuobjdetect/doc/object_detection.rst
diff --git a/modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp b/modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp
new file mode 100644
index 000000000..ab665b3b7
--- /dev/null
+++ b/modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp
@@ -0,0 +1,172 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUOBJDETECT_HPP__
+#define __OPENCV_GPUOBJDETECT_HPP__
+
+#include "opencv2/core/gpumat.hpp"
+
+namespace cv { namespace gpu {
+
+//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+struct CV_EXPORTS HOGConfidence
+{
+   double scale;
+   std::vector<Point> locations;
+   std::vector<double> confidences;
+   std::vector<double> part_scores[4];
+};
+
+struct CV_EXPORTS HOGDescriptor
+{
+    enum { DEFAULT_WIN_SIGMA = -1 };
+    enum { DEFAULT_NLEVELS = 64 };
+    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+
+    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
+                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
+                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
+                  double threshold_L2hys=0.2, bool gamma_correction=true,
+                  int nlevels=DEFAULT_NLEVELS);
+
+    size_t getDescriptorSize() const;
+    size_t getBlockHistogramSize() const;
+
+    void setSVMDetector(const std::vector<float>& detector);
+
+    static std::vector<float> getDefaultPeopleDetector();
+    static std::vector<float> getPeopleDetector48x96();
+    static std::vector<float> getPeopleDetector64x128();
+
+    void detect(const GpuMat& img, std::vector<Point>& found_locations,
+                double hit_threshold=0, Size win_stride=Size(),
+                Size padding=Size());
+
+    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+                          double hit_threshold=0, Size win_stride=Size(),
+                          Size padding=Size(), double scale0=1.05,
+                          int group_threshold=2);
+
+    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
+                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
+
+    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+                                                                    double hit_threshold, Size win_stride, Size padding,
+                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
+
+    void getDescriptors(const GpuMat& img, Size win_stride,
+                        GpuMat& descriptors,
+                        int descr_format=DESCR_FORMAT_COL_BY_COL);
+
+    Size win_size;
+    Size block_size;
+    Size block_stride;
+    Size cell_size;
+    int nbins;
+    double win_sigma;
+    double threshold_L2hys;
+    bool gamma_correction;
+    int nlevels;
+
+protected:
+    void computeBlockHistograms(const GpuMat& img);
+    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
+
+    double getWinSigma() const;
+    bool checkDetectorSize() const;
+
+    static int numPartsWithin(int size, int part_size, int stride);
+    static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+    // Coefficients of the separating plane
+    float free_coef;
+    GpuMat detector;
+
+    // Results of the last classification step
+    GpuMat labels, labels_buf;
+    Mat labels_host;
+
+    // Results of the last histogram evaluation step
+    GpuMat block_hists, block_hists_buf;
+
+    // Gradients conputation results
+    GpuMat grad, qangle, grad_buf, qangle_buf;
+
+    // returns subbuffer with required size, reallocates buffer if nessesary.
+    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
+    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
+
+    std::vector<GpuMat> image_scales;
+};
+
+// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
+class CV_EXPORTS CascadeClassifier_GPU
+{
+public:
+    CascadeClassifier_GPU();
+    CascadeClassifier_GPU(const String& filename);
+    ~CascadeClassifier_GPU();
+
+    bool empty() const;
+    bool load(const String& filename);
+    void release();
+
+    /* returns number of detected objects */
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
+
+    bool findLargestObject;
+    bool visualizeInPlace;
+
+    Size getClassifierSize() const;
+
+private:
+    struct CascadeClassifierImpl;
+    CascadeClassifierImpl* impl;
+    struct HaarCascade;
+    struct LbpCascade;
+    friend class CascadeClassifier_GPU_LBP;
+};
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUOBJDETECT_HPP__ */
diff --git a/modules/gpuobjdetect/perf/perf_main.cpp b/modules/gpuobjdetect/perf/perf_main.cpp
new file mode 100644
index 000000000..b35791cda
--- /dev/null
+++ b/modules/gpuobjdetect/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
diff --git a/modules/gpu/perf/perf_objdetect.cpp b/modules/gpuobjdetect/perf/perf_objdetect.cpp
similarity index 100%
rename from modules/gpu/perf/perf_objdetect.cpp
rename to modules/gpuobjdetect/perf/perf_objdetect.cpp
diff --git a/modules/gpuobjdetect/perf/perf_precomp.cpp b/modules/gpuobjdetect/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpuobjdetect/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpuobjdetect/perf/perf_precomp.hpp b/modules/gpuobjdetect/perf/perf_precomp.hpp
new file mode 100644
index 000000000..2a1acac7b
--- /dev/null
+++ b/modules/gpuobjdetect/perf/perf_precomp.hpp
@@ -0,0 +1,65 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpuobjdetect.hpp"
+
+#include "opencv2/objdetect.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpuobjdetect/src/cascadeclassifier.cpp
similarity index 100%
rename from modules/gpu/src/cascadeclassifier.cpp
rename to modules/gpuobjdetect/src/cascadeclassifier.cpp
diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpuobjdetect/src/cuda/hog.cu
similarity index 100%
rename from modules/gpu/src/cuda/hog.cu
rename to modules/gpuobjdetect/src/cuda/hog.cu
diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpuobjdetect/src/cuda/lbp.cu
similarity index 100%
rename from modules/gpu/src/cuda/lbp.cu
rename to modules/gpuobjdetect/src/cuda/lbp.cu
diff --git a/modules/gpu/src/cuda/lbp.hpp b/modules/gpuobjdetect/src/cuda/lbp.hpp
similarity index 100%
rename from modules/gpu/src/cuda/lbp.hpp
rename to modules/gpuobjdetect/src/cuda/lbp.hpp
diff --git a/modules/gpu/src/hog.cpp b/modules/gpuobjdetect/src/hog.cpp
similarity index 100%
rename from modules/gpu/src/hog.cpp
rename to modules/gpuobjdetect/src/hog.cpp
diff --git a/modules/gpuobjdetect/src/precomp.cpp b/modules/gpuobjdetect/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpuobjdetect/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpuobjdetect/src/precomp.hpp b/modules/gpuobjdetect/src/precomp.hpp
new file mode 100644
index 000000000..47058499d
--- /dev/null
+++ b/modules/gpuobjdetect/src/precomp.hpp
@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include "opencv2/gpuobjdetect.hpp"
+#include "opencv2/gpuimgproc.hpp"
+#include "opencv2/gpuarithm.hpp"
+
+#include "opencv2/objdetect.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUNVIDIA
+#  include "opencv2/gpunvidia/private.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuobjdetect/test/test_main.cpp b/modules/gpuobjdetect/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpuobjdetect/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpu/test/test_objdetect.cpp b/modules/gpuobjdetect/test/test_objdetect.cpp
similarity index 100%
rename from modules/gpu/test/test_objdetect.cpp
rename to modules/gpuobjdetect/test/test_objdetect.cpp
diff --git a/modules/gpuobjdetect/test/test_precomp.cpp b/modules/gpuobjdetect/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpuobjdetect/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpuobjdetect/test/test_precomp.hpp b/modules/gpuobjdetect/test/test_precomp.hpp
new file mode 100644
index 000000000..13527ba06
--- /dev/null
+++ b/modules/gpuobjdetect/test/test_precomp.hpp
@@ -0,0 +1,63 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include <fstream>
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpuobjdetect.hpp"
+
+#include "opencv2/objdetect.hpp"
+
+#endif
diff --git a/modules/gpuvideo/src/precomp.hpp b/modules/gpuvideo/src/precomp.hpp
index e105817a1..276eb22fc 100644
--- a/modules/gpuvideo/src/precomp.hpp
+++ b/modules/gpuvideo/src/precomp.hpp
@@ -59,7 +59,6 @@
 
 #ifdef HAVE_OPENCV_GPUNVIDIA
 #  include "opencv2/gpunvidia/private.hpp"
-#  include "opencv2/gpuvideo/NCVBroxOpticalFlow.hpp"
 #endif
 
 #ifdef HAVE_CUDA
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index be87bebc7..e90bcb6ac 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -23,6 +23,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuvideo/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpucalib3d/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuobjdetect/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 85360f51f..3bf506932 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpuobjdetect
                                      opencv_gpucalib3d)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
diff --git a/samples/gpu/opticalflow_nvidia_api.cpp b/samples/gpu/opticalflow_nvidia_api.cpp
index 3c11029d1..e4fc93cd5 100644
--- a/samples/gpu/opticalflow_nvidia_api.cpp
+++ b/samples/gpu/opticalflow_nvidia_api.cpp
@@ -17,7 +17,6 @@
 
 #ifdef HAVE_CUDA
 #include "opencv2/gpunvidia.hpp"
-#include "opencv2/gpuvideo/NCVBroxOpticalFlow.hpp"
 #endif
 
 #if !defined(HAVE_CUDA)

From 96ac27e68c48d8e3ba5eafb0b361583e35a6399e Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 17:52:32 +0400
Subject: [PATCH 19/49] removed nv_perf_test (it is a duplicate for perf4au)

---
 modules/gpu/app/nv_perf_test/CMakeLists.txt   |  10 -
 modules/gpu/app/nv_perf_test/im1_1280x800.jpg | Bin 143316 -> 0 bytes
 modules/gpu/app/nv_perf_test/im2_1280x800.jpg | Bin 143366 -> 0 bytes
 modules/gpu/app/nv_perf_test/main.cpp         | 486 ------------------
 4 files changed, 496 deletions(-)
 delete mode 100644 modules/gpu/app/nv_perf_test/CMakeLists.txt
 delete mode 100644 modules/gpu/app/nv_perf_test/im1_1280x800.jpg
 delete mode 100644 modules/gpu/app/nv_perf_test/im2_1280x800.jpg
 delete mode 100644 modules/gpu/app/nv_perf_test/main.cpp

diff --git a/modules/gpu/app/nv_perf_test/CMakeLists.txt b/modules/gpu/app/nv_perf_test/CMakeLists.txt
deleted file mode 100644
index c13f5ef46..000000000
--- a/modules/gpu/app/nv_perf_test/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-cmake_minimum_required(VERSION 2.8.3)
-
-project(nv_perf_test)
-
-find_package(OpenCV REQUIRED)
-include_directories(${OpenCV_INCLUDE_DIR})
-
-add_executable(${PROJECT_NAME} main.cpp)
-
-target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
diff --git a/modules/gpu/app/nv_perf_test/im1_1280x800.jpg b/modules/gpu/app/nv_perf_test/im1_1280x800.jpg
deleted file mode 100644
index bdbbd4aee95128fe0d296a50800c9cb95f31f568..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 143316
zcmb5VWl$X76E3{C`!4S8!4~(x;<oJK5P}7FcMmM?VIjCX1Sb#(!Lz{~5`u;e@Czh(
z2$%m`^;X@l_nfNv(x-d6re@B0x}Rqr*B*BO<k}D|2mk{E1EBFV0gs!2=KvfmY;0^S
zoTm#84h}9JF+SeYAR{6oAf_Orq@*CDprE2=q@e=RgD5Cy*=XsRm{?d?sA$+Z*_k;R
znOT_s7X;(U6%QAW6d#|I8At(S{(l~i!vG3=ELI!<76v;2lL7;a0^{)$fC&J=z<J6R
z1MvS3Yz#~+99#e%KEab&g&cs1frX9nzlnu~g^5A(WCdVT;83!Os^C(w8@<E>iurJ;
z#$<sw%f+E~_!8~ZG+e&1+}R`G=f?BjpF%KT05Gxs?;8LxFacQDPtW1WC`JLm#QguF
zKgGw!`d=0pm=svp{|k@vzwkgYRUcds2UJ`lru=zU`~2eufcR<I(;5nZ5<u1&aijnb
zm}a_Rg4g@-pUN9vsAMpj7DdIqN9>X+OROrNP{}{c8*6M6j^zD@{u_NDBFsF9tlRXT
zh?tueP2@px^9cK`@V&s${}{XPr+I?81NEY9cn1G@)i0EzCy;h?)8fjxS*1UKzZ#I@
zA&feFJIv}WFdFd!ZQ1O4`?^<6`Bz`vpa$A<(CJ`iQkClCGKYs29@DCyM#a@X<C8<z
z0LvlKAL+KC6@Q#myv27DbGSM*So|S{ZC04-0Wa5Q9Z_QypQ|R~U_F6QVJ#+;#MM1+
zmA%ROmYtX#PLZu}_!s`@RNL>}dm#Htw5hE1pP;~OF<Hi6VfC%<(=R{59g4NG1?0bf
zPcM~>Ft{`Nc^_qHRz3Ks+%wlUHmp^{Rx<HF@UO$<CTkb^)c?7PJ-N2lM`=fdIimFx
z*pcd%RkV&g{kyK6Ur1Lqo|9T{iO)qdK*%J1iAa%M*VvYrw*2gv$TA~g!^cW>%3;u7
zoUD1C>340MmAi*zW%v?&uD_l&0H%}^zyZq)W5oPJlVb+A&>vVc?8*Usg-uJ2x%NGu
zYo%i_-PWl}(t7d{z8xueK<i@e*78#LDYNd~P1VL+@e$zr%GA~{>^tL5PG_$xm`W1t
zHRL7g<H3p`BK(FB_6K8!Y-u?1>BE|8DKWx!+)6N%;eH=<X>bXEyf9dFII8tA$YAWy
zaaZBvZG6en@b!acS%Kan=hptqH^HsN0ps~@ud^Y7pMAB!btT^qbEkUyJf|8qXIKJ_
zS#1P9)GlZMqj<z6gQRZ6>RmtWEE#{S=ib^`KA98CnSdMlmha#CpkDCK>t$^wQw&Z{
z;e=qDc15QCtLTt*u_%7;pE~~DtRP04Z@R7S^uXE4oWr-NmK8BoO6)sc=qcFB&>1n~
z#jlU}u~ScnZM>i2GZy^%$2T-<rbe}^Z}<fMr`6sOem(k*Och4@?~dc=Gj6iKCTp6p
zraUn{T`AS!oOM%;e9`u?zPvI^b<!Z?zto>_QgFAICB@9!Ts2ft+0wQA{L$(rGV=H|
z_Z3h_evpO)y(WRcVNI%s4DMS|3$o54d)sW+k21iU-TH{nlIm1mzEV54HSLVgJu@|>
zY0H~EZ5CA5zBngvCDGj=9hca~mucD4hUWMbX_>Z19VYQOf9qh6<RDWaru?Jay?n+K
zxVgS|esk4n_>nR-<BG~y8U1`5BU_}-(XMfq$U?)4Bl!r}9510kfGL^@x~binjovj(
zs+8tVBNTquxamUe6%hE{2olozM#)Uc=$XRv(5w6oi9lCxfWrK@2Il_vt7jYdR@EKF
z0J4B1sqTMhuL(uB`s%qyfXtf!`qn+l0zuRi$|ls}-*3Go`~I%1yv(CG_)m|nz6v(Q
zrN#{K{M91tGryjk;?M*0gKEzuPOSO%B=s?_jKXRyAIbFU#|`e*fJ#^LZ41rm;aHy+
z@PFBNXERIv{@yz5%?qjI1_2YxE;dqbm6dT~*|qCGT%u)aQ{zg3siLXSoDu;RNL<l@
zQMWQHVen*&#{?pkgM*VS&+i%1=T2dvmT8WSz$&<E0+}}<b6*4E7^)M^0=B<~ie|x9
zug14~c&Cv=Sd%q+^!=F-b^Xa+@m@oWaXodOi%~`_$cxQ4jKzN=(f!6BDmCYhYaln*
z-mejFcmwtclgS+%7A8+#^NaCX6Z?%aD=I8PF=7os@&MU0c`9wRU#FiMwFJorX0+xw
zsY*3rB|femMhm*TA6&Gxt=Rx{zznO=z5}%@Ja01pu|gIS#;c~1l{3})<`H20!LW7k
zd3|0_US${L1`t-M1b=uLf|lz}m!T8N-QiVOEmd*W*CCFv_K-|y&HYSw`C^=>|KID#
zsu^dy#V$Ur=g|b<KX22Pq)h7Ng_&p_C|+Q#%g^;s(eayB2z-lDi!vJKt9BJTpL3Ex
z4sy@thH?|%4?TPL1I(;azi0c6Ql_lgNz0xzmF~$Ex}P`+o&}4U8y>ME@fkzvE4*jo
zw#78hQRGqf%2PERlwy37;jqFz5Z0U&$57)NOkX`M{;b_f4wR6gyF+_ZlF<@51RtFK
z{?A}~Y2furAi6r;60<x0Rf9tkleWkt_}K_epxp>GbWCj;AjrRH+V1vB3?f$h2mqPg
zz|ZQkE7hzk_!x1f&f>XJoJzgT`<RR&yM-@+$(DjYDlc>Plm{j!J`TA?#9rB#1kvC5
zt>~2M+KBrGntrAcB-ww%f<7FG%<j$ld`X5`yk82O2!g%bK_I16H_0Gx>on*=8$pC;
zis?rB-c_wR79=Ej@E}qRxGFlzJLnlR|B<eiNR#Yc)9uTCA+arJJ-55{j3oyv%I416
zfzylR0nVM+5xM0>2UuBL%vMc#UpLuWe3xhEi`c)W=ktnn?+&Z#Qt2bRpEn7jz9wcG
zF6Vmbmm~I4-p6>|TvhYx26W^IpIXsk9T*7c-8_{Xxi%iPrDToR>XJNw@mFc=^XqsI
zhnI5K`Q9uLo{j6&aTO;M?FoAGdKqc?adl792!4b0H>U;}FqZy&UOa#0lMQ%TOa@k@
zs1IS1+7Q1BNLZqFtl=v9vD}h_58GY|t8Cd3q98KbD1%d!kZ3vi>1GE_;?@GPbqAQb
z6yh<ZJ4jV3TTYgS%@!>9-zei|?ZZ_(^;v63P`zaNuBML3QJ(`BJ%!b1Fu`<xm!%p4
z2yBx)Qr}VSBI0Wn)W(Nq4$>UsJ!F6x?(?n8uC2-?hi<kDj=?{cOfB}?&;rhe#ebZe
z#Ct$HX?~-EmU<eB2h)j6zm1Zb9X>mk`&_EC1Ujp{nqV@F)}T`oEj>#-2CKlsvqv34
zz0W=<ZL57W=V6}Wu9Cd4!dd6Wk=-ehzNkGw&_u*Y77G6R)x&RjfN1SbcNFCOLU5M&
zZs`?L)yX(IxUsG}-2n?F{yyA@(M-&8B5m8L5Y&c<nE7jwJfO=<I7|pSQ+?hOJc-Ve
z(d6T#^KojWp#9?7YS0<nvL$s6dR6^ZVxA?ad>Bl%rOvd<?6P$LQdK39Oz1cJn7xL@
z?Dnh)J>x1_r*MvVze=qYP0<bXgSWfPZMEJibFzRVnUbxd^L%M4t!KH)vTguQrS|n>
zVFDAW9mRgy?PI_&Jgg>I;7X$xUq07}gd*t;cFSq@x8d1Y)z=R*n-e_@^#KAk2zJ!&
zfI<F-v%<RNKNo0#J<kLi#iyKcW?js1n0OdrL>Gq=B7N7NvkEWhQaN(PvGl8;4jFJh
z<+PuR`<6(uL*T%aFihf@!?1T47|=XDg}cgN$?Q5{oM(6q74w+mtHBBa6b^5gIkm8M
z89Al~qYG<Lp6!X5#3k?a(@#;0br}@%xz($4j<fhJ>eM}SMB5}y;S;@KjbZH!llYj8
z0S}p**}->>J~RF~d^ui;&<4#t=^bOw)`<A^9+{dAs{+M!>A#Cdu|oIB#BKq`w*-+G
zdwHC;TAR=<8{IhaiQb4MrwvJO*lyB`*aX@!n(mL8GlEt}+uw)kJ?+ghcVH;`@qxXQ
zWslR(BR<_##yLA;WhrLsldS&EC@t*#avx;#;(zq;tDN0!_|UB9H%dvfxhbFQHXp7L
zV@6B6anbZTlW%9IM#zrAJl1Y8u*q={uGZ%!k*;tCmv(7gB!e#}C1?a_RgkF)$R#ep
zQ6ljs>q=e1Ge~{As=*lm?=HHfhL+`bTlNx|jBC)=oi?j?{7wJwtQhKS_Xy~jVu4LA
z>_&3QxyZDzU_JNB@qIJt1rlh>^Z>FCU?nmd_mBsp2SfHCS`^zT;`#k76C+;7Tjt|5
zPdtEA4B<{S!6Zrs3j-;v8BL2K;p5=v&1|^j9>7hr{XM-F9<H(8N^q$-7|{In{X5OT
zSG#?!9+x)p$d%%G>D!od@OO)|44)cT66{H3ytr#?CH-`Pp7j)SzSHVCN}fr|;9Bj?
zB4!+Lk{zBhEDKV_*e$voI6q?_`%XRY9J=nHOE9i3Df!8P^tioybuUN1*H&U>bf0dh
z-i*2rixFl5W!XsdbWXzS`v;NpS$1yMYROV4s}7?7f|ji1Oc-<C=b!48Bq0&%Ex-%9
z){<UZg@~y;_+=sF7W*#`UpGtyxd^a92>{pXZh;Lgl7tn#sr8@?K_O$o6t~=nlsn6x
zMFubzpEzC`{xY#6<W}N_w+t#g=+4{&EnuxG_aNh728;csX1eq>ccpTvDbIn;69^c7
zSj96+Q?+7Z6!(K<mf!)ayJONDB)A|J)q+i;h_YV4vG;fuRv=mh<8sAscsGmNI>fE)
z<@kBteqZ>;lyT~%D9&5<>Eo#~_W6RSMru8d6bS8!i|cn|NY}zwUojaEfX2MNUCDhj
zO)C14(zY&~wunkjq!#UX)9CP2E8c_C1ElxbVgApR^Z95GY%cx@@@no^G$%&ZMOF*u
zXjL8izM>=DS<Tw&d^aH1eLBIpEa9i?`8j>8FoP4>9aH2r$!J=>;8KZ@7?3G^C!g`l
z!0xrTRTK(1fn775B*N(-t>mw^mc0CX0I^{RxBld?B?-V=egzh=Q`D9PzYEvY0+1}5
zH@S4^VQJwL;yGoz#d1&ykb7?-ZIxKpc5D0M8ySm>mtJ`r;?p+$5pg#i`jj=}m77KC
z`-A}?QAU)PRJu!5n1&+N83DgLT7dN2<7Z)VxxF#dvW&4LPD?H;X8&}49R`2fxz8YC
z$5j<^m3*D<_E||I%Fu54l*(WzSLkCZra=JO_G}cZv|;B9DP>K-cP=hxYBOc&a-_m*
z?q){Z7umnBpUd0sQ@}gT-cTr;vcIL)aH_icqB)KurqUyQSWrH|{Oz^{2bRbKV)V33
zZ?`%RdwZ$-Hbp(Jb2DjVvQat%D~R>BLC*W7p$7P+n<ytPx)6V!>$3amYhnVvXx!h1
zG0RjI1^QUCNzT2}{&-yktyBO6|D(nvN^CneJxqeRt7-FCGQ-FBJ6r?ntm0rEXZh07
znt!=P*or?UQ+zx8_^$S+&ReI`z2tR|rC%6lsShfbc`JRv=v3&oJ`ii${#o<bSWjPi
zV^8Irhvm`w?2>C#s_w1uZ%h@LN^FPJ9eZ>Mhvrq=eg;VLZ=1*aQqT&b%I}2GQYmDd
z#wi@xk}V1vUl{>h2^jg(JE&14aJ0ih-!^JUYVSa`C>Y1^C(+G=d@Wy{Lvu~Lv&Bx_
zGsyz98?c{qW{mlJN}g_yG#r7rbc3hHHWX(cL@rUK4gOCcakAn7Lg%;MTv3|u_WhfB
zX!iA@MU|;ZNB1z~dnxF0_4vQWra`0133<Vx8>2oyD<=K7yW#%+zYdHcZH#eRQ_?L{
zSih~}yBwY|!YIB;%>LIOeD-GNh`o|s+MaPiEb9RpGbV4_3dUDi=F%LUK-l|?;fk^M
zCvdAaNrRCCU12VhN~2@02`VA3IpgpT8?PPoQ*#pKTcv9?a{dlfv<TU(sM3b}jY;4*
zVQ5@ysduyUOyblRG4ehF{B-xFMjy~0mO{4^lWV2|g4kQ;hi5q#TdO$k<zB1^{&AD)
z@6UtrPGVobkiUBbtVl&xe2BUL_23bf-INVR{FM6ub>$==v`qY12UcNf>@%#D0dLIx
zxcYmv{$&a6wMTJ?Z-e`6>f8{5u(xd-Y3vi?KyY4*RWc%R4STmq0g3|cnn&wK{FBlz
zKwJr*^H=#KJdH+XfyTo-UhaM>U)6qWMo(V2@TAYd8Y_><<};R@JuCG1?_Rv2B_1EF
zkTQYWsu)j#e|hFbNxt<j5a~0)lc&E6`{T8G?nZ62_H`0_7(f6XUOxU`(NY^HhOAxx
zslXVg1{oPAA(Wkz!9ywd;=0j55|gV_)72ql@=No*jw5G9oFV&0I1foy?uT!NL0bYD
z9Kc@~Rxn-GeLpLRs}{#F3i4rF2uF->SIeGvCew|l*=(?>&?p!Djy-X(M<>Zbl4;I$
zZr=a1+;}6I?O~<5;ecPP^UoPNAs*XNzMZ*d{Hz$fEZ9t<&D)B&Rfn<Tu+M(Etp#Vw
z;+h&t&p?+fXv~B^&4>Rz<XwF3rGWx@aQa$Ss{=TST7QA;qKkK;1_Mq)yu7nIp{fZ~
zKAt2>#sQLOgy@x9*UlmE_sp5r`MCuX>49CEZe!N&yJpH{0C7m;rZ?p_o&#bwpU}r8
ze=jGc)9YlK3GAdx@|EAx<N4L+vSp=OZZUIO+TXh^?tgOFh|Stj;n&s(>5FM`g>FPx
zyL#oz>aEDLIg;Ml91VP@M4@>R9(3%xj%*SoZ+CL(x|3F66YCD15&_h-T+!zIYKA(h
zL)&yK^6y;ER2_I_ZC7#bWVcC3;TUf!63q6VPr8R-qC`Mw@qoH@WTlA-0>Qs;<Pyav
zkj4YbXIrXc$=E2PU~AtZPo^=X@i4R0r!0N|o6_~3w#y1iPnjQ3E9+iI2fRR6O$p5p
z{+<RGihQf&F~KxgSYrzI8R{*rHbgYVL#6rudB9zIj<!U}HNb8sVVNg9SRiXqX;;A9
zxQjjQK)Zbw=iL1g*QaP$`v_<nOAL1+4l+Y~<qi0UrZwKgnc9ubu_g!<Oa^Kt=tjX<
z9u&B9bqyW-^~8>h-~+Hgt5;3SDX!G>ADM<YejCZ~*!P0T%~v<9#3{yc;KC{5(~Cuk
zbo+c@{|oiN3N?(^sfIn6zB8TVI1)U(zmneMKLYyWv5f|pU0p^3NW}IL><kHsm=0F9
z#+0!4ETkCKTLiu*%v`ZA6X3M<Mo^JZ+WHSJj5?Xq;<x2zi=pl|25a>W1OCxx9Bu`>
zs=x?(dbCgjg~~sah2=+fmG`fJn{mX=oq>G0KjUSsk{t;wtt;QdBNjTJ<rE*boYcmM
zqz}_)z|z@^lCTX^`}SeSapN(kqs+eiL$9u!KAxM*gRXoBF_xXcmdQv{{`YZX)(M{e
z0x8>$)m7&x$<+yv!wlVb%`pdmGtaaUq@A(?;WN|Tq3&KSp`6eV;Kj(rMj?-0{%T;@
z_|ZNYPy|7jI~pDLg}0F`6ul<@+J&RRA^%5uq}2lVDhZ+<i^%!(CiUpI5odPc1q5b<
zNuX9Lm{<3o@XH=RNUi=Ofc3ej`3$Ba2MWgrBir_tOWksFxWMoz$Ul4io+4*}J=NE^
zl`}lh*p+}o75fFF(VcSM>BYky2-hY=q07c~QNPE{x%;y+ZtCIHTyP_Q%fpYeC9HFl
zxOI#t2NYFI28^ty!Opk>n~i<qvP8aPIIifEQwL^My~09-8wsXl|0%%dEOM6EEVIoZ
znqu#{g1uEFP;-_cS(aTk#9T0>7T00O5!+{O2urTYwUZdrQq_ssn<2xoA}@Xfz|EtG
z^7n0xER^%91@~AN|Km(o)mP{Aq^u%<tee+v#sm-OlbEAt1z&oL8y^3pN1bK!j~W_D
z$Q1xz$sya8*(+`%?iB_f=3J;W-0d>}Ste|nsrBU)ocU0ia?_8ndV=m-dkL=o<YKws
zCH&LD)wvExJ2iIQ92ALgHK#BBJQ4A4E$$2LU2|p%M-S0Nd97uQ02!Zu6xrcRS@^Zl
zp6;*4Ag7>)JsA*G`_?MwPGU;(JHk#&r1TcE6h<?6ni{8O-hJMe5pZI(w{DNc_#4GQ
z_SpwMxhwdpc)p#AnPH?(fJKnaK+!qG7O!8Lg17D{_4XW^?5KO5YeNP?UTV~O1f-aL
zjoZfBU2e-~g^<A*=i}uAAY$Huu?yKX;RlV!ImZRFUh<CK!JI+x3y3ABzrgmO%ke;j
z(-fkNs%~fZy(#Qn-=Y`u#&^P<+ut^ES`FrK#V?AL`sV}ISe6G619@X3^P&hDCSPUR
zTCcqr%g@y-g%YZ)O@23x48hA&3K4|03*@rxSlmrQB`U(W!Hs)!1V^cq(_(x=x%rds
zc2zIJ{JgM`ggSZAZAv#4+rnDU1AHj|zU><`_z|W4HMG=NFRQJa<iZ{P2&f!Xde-nC
z8>GjvJL6VTt~K({c#LEDdh=kFb{VNA(<+B~quGr$vdz}-Ut;L1#e7*v4pXjo3+7I7
z5d5yYNV3rPnfnE<uX1y23_^>|+^{iPrG>a^|Mab9HApjMG4zu~T1TY_W%@{CD&q<k
zw4-m~S;A>e;1MU`>Rk~0HN2acmH;sNtax63ig%T(ED|%zSVeecV;lBTnenDJt@jHr
zLPreif)VC6c^jn75PHG`tR`%ke*}O5YN)u>eP|iS1h8EB#LwUkh$$LG#%P=*4XjP)
zE0ozp#?z0=U<cVK-7VkwA`?21LxnWAOO8vVKiAEJ|1NSFP8#Jt^^z1_&i8{4oXyp>
zj1`&O%y;_M#qu1bSy%F<Rocj6vTUmFChq*XcuW^6N4{7CP2aTKu#-<#fp!poEUeXh
z$LBxKB!5&sk7s)>OhO?-L`r7E4~A!}znnWUI(PZ~4c83TjpNo)Q4h?U{pZpjm%q4e
zDm?*I`=#eJ&S%(qZIHdW*=ZgpkoXAD)6m19Wq5;+Gw|0?_=DgI@|^uuAnUVKxcXmS
z1Yt<i#dG#ve~7DETGB9G&3Jqdy!}D;PuIV}`X(b)UF^SG>7sCLZT_`+z5eG>`u^5-
z_i!!$gRDTte9)hs>Hw|5s`er2LR_)%dByZUV~KIK)m@$1v}8H<Io4=B-mLo~qh!+(
zftbcG9Xh+|<3&WV`yZ4hPK*g?C;qvnEcVY>Jv(iQ6l-O?ZQLzNbmI?nZeT4a0UTC{
zrl)}t{zHlL2fN0%84<nO72#Kh7{&6w(Qe;7tP+i0rsS}VF-xyMFR%%%K-4qZy)k^o
zfwVOjdRM3@=-e^Q!CIWPY)ZYEE!$$5`j?>GU%MTdFtcGfB@O)!@fH|S2tyZb+e5Q)
zoiuZIxWaB_ptQJnBdx*@8h>EE+8b{XwpY_Q9&-kazJI5|qD8MiU1*j&y?HI~)NC=Y
zmjW2#vmj6FY+-Ws9{vb@-f1_e-5}jGWnFQfz`>Y)47+#zz({|*_Xv0&Kr((tqId5!
z$@O|qBARy9fV2EHZbd)BGLiTT{ukcCHcoe|sLV?QgFQrs2MKa$vrLKdVlCc{$rq&D
zCLBO8eSXgLY3UKr-KHahGyYvtQKBP_Eft;VEsj|IR}wn?-@@H{Ull%)eNU@%v#H$s
zcp;{87gLM3XiH1T=g&UFP3x6(km`9S8HpRkZ+{Hx+#j^6$3i^#-%(sx{c%&K!F~Vy
z&!+0qAd@5T(A0KKxNmnbLu1KqsadT(lVP$|MyNrLqa~+m&Aym0O-tg6bpaq(MQc2a
z@;=OTC^kO7RKvsQwOwTL*_I||=6gt`kH_U?J!()^*$;qf(aO`S8CEj$IlJM0=(bEE
z+$#kspDPUHPH=b0jO;aFnRJN_qiCPbE<B*?<XhTV_+8@Y;)X+k6ttm@j?jQ}6=l<R
znReaqY`do|nyL$<k!)oK6{r_!>7gbIsXAfCZ=x<SuiEexXs%eOYrZ%ct^e|EoPE4D
zn3K}af`nSEN<8ix=mn*qA8@l2Wwk+G#X{Ti9>jQMhBnsV%HbULmL78|*KN1HVSzg~
z=>EqH3I<t8@qPW7dTkPu^GS88+qlmFGy4>H2&MyJJ4n19?2;Gi>cQxZbdGL8&YhoL
zgfF*N2go%GKR6*&6UY!cH6a?kS*JD~IN}D~PEMSDa0B}68hMAs&L^%7{<A2;b419~
z0rNZLiBu?W{U@+TriZ?k6|Tw|KdEBD9V&Z}zVsOSP&3{%tcy$PHtYK5KjIH~rlfEQ
z_t(2M$)lJxWsLOR!!mRz2chx{h{|LY-URHA#}*YzQjK-r2#`&(=b>>DZ$;`dyuVa9
zS?|z*caDJ2fdy)0>SnhE=rGXK^?n2<j5pwX<kH4x_=RGcdoph9k_Y7>)$o?AM4H*T
ze@G8nEa%v=hbKDk;%ZD>^El&Rvb9FZq&9zbF^C)_-y(1bjXF#JsyG(#AOtf{u{w*L
zH-Q@!M<AI0zWN)2P1>NmBdI}Kqtn%Kl#K^k|K+FM6E4>BH6y@>fQ`e~w@aUnJ}Tb_
zInV<Oo=~EIYgy1MAYT};g~YL)TWkxT8ci!P=t1v<<y_+MXFn6N2T@pQHr2|U+hDCI
zh9;|&%q%Qi<w)jVC;KJEzrh@c>hkMqQ~t4xS34^1W#?m&RKiqbg=w%I5<X{kpm{bP
zwA0@8RJ%`<@N3eGjGK!naVPX#<wZ(kmi2XoeXh%_FyYtBpQ!i}7N14PN=$}iUimau
zV1=pgqt~|hl^k=bPh*?PC&J4`tO|9ZKxlE~9>Q=(t@F<P!@dnaUOtZxucsbf%Tz#c
zg)~*%VfKVxU-~!y{e4ot%JjT$sqqh7)oGuNbxv__VFl+&Ya(EsYKxO4dh5>&V2`FT
z(eBT$9)9QvEmOdt4y}CDx~BPKrfQ3NqzgCPHH&c65GQyR17SEbSyp6)Tk)2?Swd3k
z<F2Vt+;Dx@rn#^OHg90eSN{LDh^Vh=K|ns>XKQ_Rag?0i4dH<=Q5pOCCTEz|L`bdc
z#@oR@;v2djMv?1=s=SBskAT`wvu8G}UQG-2eA@fZXgr8x(F|jFaw_)~fg`{$J6p>i
zuopd@Iehp;573R{-A(_5nVgkySXemm4`v;mdi|r!x?keZj~ydv!<qa<14S7TYPM~7
z#m4^$3)8Q4JYzMwm55WJv=ZP54-bHmqx=*r)^Iq0kPM-v3#l_2kgsjJh&JyOaH-wG
zb*ld@dsbP<vk?<%pyQrq_kp(-W_6c!L80T#@~7@|EoRkJ3LU<)@}(82Ddb`c*B2&D
z3(H=L>TA_obcT+V^_$_F#iJgxk?p-R<U$Z-{eF)-Fbu-hrOb{L`Wi(CvJMGh6th)&
z)iBULpxHFI3dQBCrW002OOmlOI8TxwTH?19zV-UG!7JryqNOwKe@bvJ2<n~Ia-2it
zeXL?7#dgo%<ibYE#^Q7jwMvH)tawf%+AN)2Nq1$qb11w&_@BS^uS@Bt+~5;R-}lN`
z=K5DwP8#*KG=O+$wr7Ab2)=UlNz@#}YpN1N=R4wX_dGwV1`Dg*YIPU=b1l4uM03;*
zCnFU`U0*b8O*3@dBHyi6{5J91Wcx6v|Gwe==y|O;MC@i~d8pDe$I^qv3eNaqkHm9$
zX$)>B8%~$qL($U<yKp{d>BGGc^U+_j3ow~bE${5(BV{PJ=HUT5*Ed`UGB32=YDlF2
z><x3vx~Y+9-=b8mc$qJ4J6bnc9UcgB2zn=-#Hp#mkpm~r{?GS|H~-yM^TONM`20=#
z%&#uzC=CQ?12Dhc;vhPeS+=}?V4KQ)CT}R~?%MMP;AiY6d1fY-6YcC+j}lH{J@$qF
zG=+}PUX94qxU32h<C;HJaUDqMgCxqb-I!k3Y&S%uv7GmGQ{7W&=;Ns{+dyhZX_|sw
zzqGoc)^8)1U+QFlmwME8agnf8M2!c}h&wz`dab~4`d(w%C-y7X=SKj)y^eQB(%yld
z{#{5hmOybNpOlgy*ec1vsQ@FAy-DcMD4N&6Ytxrt^1gOl^`aPg<yARBBP!dUI?pGp
zY`hFr7mvx>X7S+uj^gEgmpoleS~s{;vxqYN_=(g2_z3W>-#J%<YEd0!00qxUmcn&A
zgh|AL4Xpn;8)_%-jXDIFzpx0k`)1zlLZQ0FyR+B7F-0@Ov|EwQT8VV`^A5}8dg_GT
zwi+%TwD)Hd#Xr>>vq%b9cHnC7-ReRn`U)lJOzlXA9nwm-)O6;5zmf>!hwUMsU>_94
zGBvT}@>Oa=ZG$tlBWuNVCx-4?Jnj-kAC@-&)o^NRJ+ve7x0i*u68F7lgB^jxei%S(
zLQ5`RWf2FC^zopi{(PG+7^)aB_sq(k*Z0*3_+t`NZE-BA<>np(f=IV`zO!uic!J$K
zet~yve-2-}RGL+HHMy2U2(bEluz-I7ay20>*#E=XyklINQHTRhVuUkpl`^23*8N=Y
z#}i4a?)c2Zvs3QdR|~4sC$zoJON$Z<3qE`4;EfOF<$GI>o_7r_pHKW7p5L^TvmA1{
zJoq&x>b}wajn*yUrt@7qeOd#Gh-B?nNH$O~56@%xi>voYF4hPLV_YP-R)F}Qx(+aF
zHC;yHGt~s72J%(<+?qXZl2$@f?E^<*%laPhqEF_omUr?fme&Y>gmq`hE8Fj|lv}H2
zVrt|pj(}l2yHbNa(S(+DH6ybAj-o8<Qb<;QJR|^fFt?s$!ha?{-5JRT6Kfm6Z5c(H
zIXdfR8pFc#D4hb;X<x3@pSB7&ooTeQ8J9Fh-If0ck(kL}mRM3;WNc#=K%Lp;zML8*
z)eF_7t3*nsJL!6o7QR9g&ECZ%l*r3wWK9I<aGc~gv#>PedG50&ascL5vpJj0n92wT
zp)i<{t7udD83V0?A$u@RXVP)M?q5AQqb$8qjN;|%G6k*>w1U1!TU#_s5yhC}3>Y+d
zp5mEBR`$RZf!2daAgIlx!Z(=(j{nmLb(&ecbG6)LPsG|^J6S3z`16pTTsz_6LRgQr
z?=#idbP5_?b}iA+scfuWtn<B>%M1pd4kTJOxR$=tcfu<-OLOvCvd|H%xccl7upe`X
zm<K!bC#kb-^8Zq4Os!W<#XvcNw`DEEGQO_=8qLNbO|<&u%w6EJvlP}-LSo`VkFXgB
zrLSvX(}R<Z&m{yUbXn}ZL+a!Ae@!4;USS;HKeZ2aaB#z__5F+m#9K5VuM@Crk%CNR
z{0Il!#BPm=k@98y@TkL7T8VXBxkms}klNeU*bB+DB-^3chZjpCO-!y*At@k?HY|bv
z{7(zqm;G<dw>BGqZlkE|^^sx;_%mQ$bq@5$p|y>((Ys4ICXo1<`2HRArKcvdz`*g{
zfB$5Pw@pfK)h^QPCN>mz$!YHE?9(k9qhfAZ>jNfU`n}5(oiFdaf9JlHS|`JCHo#2k
zcEXr!#p36iWsr6)ykh4AL3echl9HScb4#Vr`?D`<P*)Y?$(He;HfNB+!B7j}nIXqf
zf;m^sn%BBL0@~wDf}8fY$>l|UTi3Xl%8uPxZd592?n7tTjk+c5Hp@uJH2apl#+u)&
zr?6lSB|S*~5o$?r$eE?@nxf-OZ}3`dRkv`KbdhLpv=W)(46uwh`Wg&63h1BpInB^8
z)vH;*D6k82<BZ${m^4;qM^K;WQ0o$hfZQt|0Uko1_&C)a0I>pvsG?rs&ilz$p?tF>
z8Ab%3=DC@GTc5w&j<3afh7w}S+-fwyv*V)*RD8^w&K#CU1@J3g9CbpH0naA3MJn<}
zP(Js-$}B4ymSsv39(j7VbRPqOFX{pS<@>TH2#wHXk+`S|(o=|Z#(OvW2ryB=aYVlr
z(=c21%+oT#>c7>2&3j4cS?CDV71A*<rvE&9f5ZO%=jE$8>aS((VVh>L+hvoiL6(0)
zTRlz-Al49)>G_MhI>TON%d<3xzzx*|+WP;R92d_v7SLkZY!mG9g;QL>UjlR7$WGsl
z3N53mggg<mAh?%&QAY3k01XQ*LBYR_N;&ZBi__f15hIioD@VS^{9jy|Mj}2|>PNtV
z^j|{??7TQ07JrLP&M&k|6DV*QHk}l!rsYn!QE0gN7c%&ZpwrEcBvOrS*C>Msxrk<)
zdi)u$;&%3i8Km@Hzj|g|QI)}Ql#bQUP{en#&f5I3(5r&-q3CuUTe8txd(?~2D9x@#
z!4-TCq6wa}=E@{;*iLgunp6Ez!OqFf$M)8#m&Yg@1?_KcnH}l)jXqgsa*gJlKg&a$
zHLdnzEVjvvQw+O_@xS7+zx!x9%Iwo(WG9~br1~Hh39FM~S>{z#H{&qTJdQiB!HPrV
zF-k=&Iy|-Fm3gjskNOd6hsDi%C^?y(C6yoKb=#T{Y0TO@`6Zog0hm~z{UZR{iL(-+
z<ehw;XPD`>hk|^u6fz06a?F|pUrY8q0{+azJpzUw0XL@OHmxPRgF3-*DJnLU|AVal
zhN$Z*LGI`1ype+eV4CNbCJuL7U%nj<Dqd+NSW!~)Tk!!@kUZl>^-@z2UHEbXAd^^9
ze7A9}kcZ(lQX%t{(H0JU&Y|)n>~)4ATs2Yyw`aAxJ#V{5&<*QHtGmrJI`@@}yPX5&
z6Vs;@SRVm>unfy%nv*N5P@8EG&cRP~(h@tuGUM0KmZ7Zm4~hgH&#*>1_H@l_wMh8>
zZ%=cfw|}jv)-1*Bw|u8v$G_W(CM37Hc0bz|`d|&4D=@px182K#7yRB%T!*~0n&OkG
z11l&B92@<<ut|e%Z@K2Yc-9%O%zrm2V`xV1sx3AFRq0+8<ldm7<Q+c=bPYLRP(9Bv
z5KCg^!IExDSZVdXt4HuMW_$p_d}cUy!d*gyKj{GviU9<>P1s%%v`XW3&uZR3^k((4
zUYg2UT||y<`Rt}v>^4y*TqLIwc7I|8An?Dmedm;M{qbzo2m5zPot{1x_#>+1y<yy8
z6%C@4LqVhd-bBMLd!h_SYSDK~$7LvhucYp0Sg@yISRLuNcC}8z<nv*yGQO?d^6oya
zu8(}AJzPT-9tTiJBo8Qv4T&aK{+OkTa>{NCOmgt|F$m1!pmpc1E=tjHWj7I%gCwLR
zR`q5dk58^<^tYOK@jVl%L(yzoZCdTPi!vg_^yOcda@zmf0GP`t9KMeIN4GDUM}!dj
z{=w&0riYPgbK3g}3PyVr|K>iQ@S4Bj^LM`X4|NKv-#wPvZE+OMDaTpqRc5#0Zq(Ud
zKUH(?lqgr?dxocI$qBL#B<dIo+WU;V>hTyqEOdAbTTs%DwY)v4pbXquaQ5cM9E}YF
zZrbH=zP+FUFR1&HDj(*U=GAm@3!`xAHsgd3d4LfeiJgQR>AbwB^_Z4cMx}aO!rB8Y
z*~QBVpWLrJ+)1U!r0WwShJU%LZOVNoQwnhJ&|0Jr;FqxE?#PQ@$cF|u3e>(!rCL2q
z$)Dp4vCn~Bn;h=y#ea7xF43}du!`4kyU(OEnD|fPXR>;gqn{R|@hG{bB`eHVS>u*l
zE(6IZwT7ZF|3+8K9l)*xa^vlB?t$QnDsK>l08@#dbSd`+r{&TWdwqXZgj!>S9aXru
z_K>m|o^59*SupQ0f$jXv1HUnyMk_s_>&lNvrsb05{tg*vmAUzsi-Af$LNeU)eCe-#
zF(@Uq{s&ackzT4RC}(rfmefzXVhO3*ACZgPPhoz}%wG@Fu6#*b8KD6z5N|tn7I2Gl
zQN?R1;moDFc%_q7vV=I=I0>%bOPfCQn>vS%y`jAYf70`gB#P#p*30)caHBaj-8Pxn
zef_>K-Oh|0|E#*7lE}NL52VlLl+|85H-W?Bk=6gp%)Gyxl2qSy{G^bT<cj~DItk;~
ztvFU>+tOP*N9CwIVfEiTqJe!_Iif04yRR2hjpf9BapWwo6%y3&4&Vj2ZgMbkK0yO;
zC8Lzakhx|40@@v$U33CLN@SVk^Pl+J5BIKNZiD#<YcU^`PVE=&+%qV3<E`oqOP9mS
z^{@Q()4vH~=&*6=7)_wgC}#I10A}>{<EtxQakx)lCe%H9e3#*7W`w_6j;?-*-&@oM
z>-dF<jf>Rup^5X_-x%=e26e4Rqq)%XK&<BetA^{4KMP~Cai6{~Y4ACi*?K$zLiq_K
zxz+ItIdK>lJBCes(L!at!~=Q;SPpv=hYcPnqIY8q1k*<n7@BPwYg++oh44kp#;4u%
zXxV@Pu;t6oDxp2!hld#l%Q7p(1Nphb7jer6l6F#!T$3HGdWVN;o1$7G_^8{R7cI-o
zmtMted#G}IyUK8IJjsmqrg*jI-XWga`RhK!cL9%p)2gKVvK>shR+R_&KcK_G;2vbW
z2UD-i2-C{OBVZxarn*rjOst__geU#yV&@x=@-DR)I2<AkS}d)TB9#+~%&whP3x(vl
zRj*f3&+}KLdh_n^9Z9nW=>Cqe8sB>R&O%EVXFJ9UPo`=^YfPTlM_Z)q26#z3A~D7g
z0%oP#|HYeWC`F$YS>GT}7=@Jq#@F!kV)}$D10;822>^YoZG8mzc$y6x2v`P8AXyCG
zrK2&^F>uSND)hvmaYpaV_g)S>Aq|`Ni^hXdnqQOm3#gm6V%>s{A8cmK7Z=XHsAu6H
z;Wa2UV!^CFZaH{6SJ_V8mQ~0w!saCxC;X?PO?e{x3x`K)F*#+J7R|NiSd*j(^<Izh
z(e4|!XN{?ne8E##dX=-RIf~-N=w+I7P+pa6v$h-@{3tL!N(<h7(GZ!Eqv<sGE_HCg
zFIu+M&nI1eZX=<I=4#f>_o)hzX{L`*Ynz_8(RFpp**Did<%nK32=KPY5zVnlyBqk?
zIEZ_u^HX#E)a)o`+35k}nCP3)3j*%6{4HiP#F>zlBgu+i2_K))iyu)Co4W#-$<?~Y
z$gSuT4mZ&LK^;mGA9yv-JA5lCBk`ZUb2m~QVY#VR2lQ1wYo4{D|Bmi55U8GC9H})l
zxtH#du`tBgU*P?C1e};fjJq(E=83)HxIw`7e{YM<O3woE<CR$P<rh=KMzAsE7vUme
zgRo3Y20wp(?CGDOR`~_ESCiy;f;d#wFN994zYp)5q-%bPK4YQU?bsM9lzi)VKcgJT
zY_5(+zwaop(?Q`WzjW+LlQV`<ix%0|$dmS2tKjTs^+2hs1k$Hw^-L~){jPoBiqJ`X
zB_j{Z+`^%?{{|Dxwd9(;guW(?Z~xa0o8`u5NA#3pA~G2xQ?N&Nlj4QL%64ZM|LAFP
zxIf=nYBP))qR_OW$9=FLNGLCSE<nsU7yFZ>6Snz#`nQT^kpbz0#h;?%#iAJpZfg0R
z02}v#n2*=Dt<7K|NfQ0})3XZFRMAQFd!kF4`2tzeip5V=n=&_<vMvAOEI)i#{9@Cn
z_ky>OH`|eLh9~N{=BxHs^nVyWmzrZ0-i>LfBMF<lD(jyc2n)+`R8FFgHQ&CG$!F0N
z&xAi;b0h~Fg$6s-bCfvJ8-Kh|4V7I~d1talZzscP9Hsxj<Z}EI=Mg~bvKIaZu|AH)
z7~Cq?v?8D39x8hmAr{-SHYXm<@4o}ZCR$d|$QkoP*}d{45ev~FcFK0zKL5h&V#z}8
z>k5x|JTaQZNbbXppX=iLqch?q!@WI=hfYl+yG!JkyI1Q=DrdZrXkOd_hl_i82?mij
zwYTv@{GcuJPVXN9i=j<-@NXwxv~RR-+M7>1D>UQEaSV{%!t=WMD>~USwdv|&@=nEO
zSo#U%EhNIysAssnKDR)8HjD(w-UPA$oAIVbiS^tlfr6?;eqqO+WU{915#S0R`Hi%s
z&k%aBeUN``)}3R#I43ksoQW6k`{Wmcmd(blSO?$8WNYX8b3Ujvc<&L=^rALvd;E!V
zBR2<dJ+Yq4^KOBI**`|>7Qam~lXmdgQ{&3hQ(k;w;|OaWm#*-v%-%*yWw`N8NJ!KM
zv<v;_c~U^HZ@a}6&oPQdn`iD2hH1rPh>;=b`6Pz`)2|w~?y%;26w8(<ozk+`7_&9}
z3;i@0Ju-+rJu^$<{v>>>^Wt#f5CF*VFs;rFmCU>UDdIhCu7GcdXaXOwAYBVhd$Ai}
z!%FO!ZA%C_-$cMsfNcG%f}Nk=Ken^s@c;s#F+Kgx8#2HOl|>Vtsj_UnmH{(j3Y4M#
z!ZFRq!%f9`B{q}antol*E*gDwY5$a$UP>VK{+eFRu#0e}tJAGs+C;d3$5Votv_wSr
zBTUdAuNWq&H78lqUDvel`HKVNslKRdXY|_6orruxc;DBm!m~ABx9d*Lg4q?n$7%iu
zt5g5O?JHAPWh1^UPlbd^5gQJWsVmjJaLVwFx?y<)5Z+WSyUwC@CQQzZU%1`X2mV;q
zcm%{e0)Dk7KfIW&^L!Xmnslo^Zm;=_>>05^{JSEUWZF#)u{T>||D}#c;lN5di#-no
z#$c9coej8E7MtY3b4BHIAT)NvRKLm{687zWrj<R+Q(%l=SG|O!Ea0-$v26iRo?uPx
z>#SWmsJM-_okQY*Lt1<1;h>+|^)r9}!VJ~-rnk&I1QUE{1qQK=9T->F{WOy$(WS*H
zp9A%GMEmgmvSa2q$&q2)P6FvfGADV=<$bV>a2|683+p+$`M)>V4map#J1uSu{Ey3i
ztUYWSihp-&GoXN^RT*#!r|CH)>05Mf%BbII9*#Og=mHT>a4y~i$lS-jP0ca#+}fqS
zdEXXyxBlsv^WOS^tyn7cuBr4r&k+BafTh_#Rq*z;mS++<2n(p#6`pVSnv;00L_;QA
zi*~a@;8%xso$OsBk=myF)J}Rs+B1$UYTIS6S}S$wVIL%^5L*pWfTsuZgguq<Fjdqc
zTAe}OJ9#Nn{Mx$i&2HzbRLAo%y{kFSO-=f3Y(Y!;2*U!Y3b|<7zgp<mizT~fG}pA1
zqzPnhwOjul0o(m@m3T1*r;UN*DJ>|<txv3!O6MSgD`8;w_a1cgYKr2UUz;rpp>+yM
z6p2ej$wXiroOc0nZ8Jr)o4jVx<38$UR?k_9*RUVv-VGwo*|6?wo2#Tm9|4KVl_&K2
z0$Fe2hWeG9?$vgqqCY_|TwMGp1W!|ckI^udbz08~AKrs41A>AC$82$O16?W^zGFT^
zJO`s>EtmB=$fv{M32E(#+tm7I#HKDigphT-FDdyje9hgeS}vxh?12h`Op%XS#l{jp
zn0eU7#g|RAqK5uH$Jko0=xUrv(d5WPASgW(!y$18-ISEgC|Ud^zEtKqJ;^ik@03~F
z8eH=<<y74#&>n$uYJnX*v$qQn%~ejAO^Idw+7#>NEX=7It7&uizyAEs_eN(X`!~&U
z72jd(o&vF96Ic6Y>+iCbVXi$m_-2)yr40dFK?cF^D7EJk<~g<|#StLddG_NA4Hy6R
z%@|g;_jA``DHZl9mFxB8>w%wad23N#po;8xKNV<%uSBndNv67I5y8=K-GNRQGuoBd
zw@2B^XneqOb=vwGwNYwFZFX*?^5%Z@IGbKI#F_6FT3I}AF}80@UX$2XC|Qsd%<NJb
z;DBf`Gs&YhOX9#ye80|kKM?>%tvfw;FQhvHfzD{tWh$AbE(C7_O<kQWD0hNjgdGK2
zp`gkYv*#}P@^<gKse~3g5{JJCL=vs7>qN1Ae91W=jwt#n!nd*hufB(RzWnC;S}zu4
zaCFhgkfNq@BCa>Ko5NxIU}|Mq<*sT><?wvV7(p1Je&Czb$ARLs=hNAhDA*)gZ1e0x
zS1m7vA@+EHT#Zw0KtuhIMuwOnF5>sM^>V2v0wpnKVRL6nFg9ycCqlp|*K*TmjKH2Z
z)6*Bn9Q$gGQ-_=)iVMfN_o0yHFDuqfonu-R3v+$e$CcVpMFXVGy@Mu_!6jn!tbHBY
zTwO8s-abIJz;CNwT~`VwAJ~)m#W5?H1<rTt4{R=#sVJF1EZ!+C70?SUgipnnX0JqK
z@($M=jYU`lFXf3JoIh#fBDl(3pJcXLK_0BO8XOAn*y*an_eAqHHGT6fc|`dM8T@)-
zCa)t%t6h7@MPyD)$&73AY1}H?YnKEYOJ-m>M=`_o3#!c%F#eX6sanPobw>&6DV|;c
z5T;=}3RmxJiucpFA6W830dT7lNd3krTL%F=oO9%D>s@ISR6y1}_9Wi0&CGT7xBMIs
z8YWnKpL^UF7bbjZ_Ov!o$5`Udr3pRwUiIrHW3$??;!@P}liYnnD~bzuk9~6(xhq7s
zgv80eS$Tprtq8F&{(VlY`0$N(xn!mfG$!M001J4)=zxfqQ9fDxnE$iEli*xG<-Sg3
znA~M1_4jsA`Z4Ve3}dq!*>K6+<5Ib%6%Zs=QTHSQH44qJ=$b!q=<CbmH5#4`_+5b3
zXg&kqSJK&aql1kOn@3|q-m<;QmR|1oaA~l5ZBxBu!@Jeoz{tz&{NKW}C8AcnZ&fsu
z;uf09wg&K{AR{cvODn7|d@2~h;EnWriiwVN|98{St%&V1*TDYj)S&!qfyz30`nD&Q
z=fdm9BLMyMqgnv|Nh|j==Y3KG_@!`K-3B^PblYOO1G2+R;_J5)Ty0PpD35S%)2*LI
z6#2HH3wud1T2fQbVfpytUo?Pa_F2Xt6B~X(_nT|w*A!V<MzW>*UA$bD%zTU<Q@@-I
zHMJ(|ievOj0x~#@3G0>u?sc5kQ&Q6tUHee#mh_^&{wYiv2Hneo8!Knv;LVYS6a~sn
zNXlAQJZH!+0oe%Sy1}Vz5$urNPI{^O=1a1Bw_%D|z)E=NU*j;UXZbmt)M$cdy*QY;
z<dr38oi}PiefkFq6o#1w?5+Fis`>7In$0__!cogsraTBXhE3#7B@!A^ZAFnSibx_p
zu*xWh@^bYtLfOw);PL`liL?^RoUVSU#s`Tq_Pjgf!7{aF7$5$+3EidrRFGQex60Px
zFPyw#l|M|N{M^&}pWp+Fe}P>9=JRQ%l=*<72$Lz#@;oz<FBs?{$IIEqE;1<E%O^kp
zIJMTd1OISb+t?zJ(iIy*5R)!q0B`*RU60hJ?vT5>QA{__jjioRDEU<L$P-fO40M0!
zN#CWxF~iz6m(OC)#^3AzxcFnEvF!(~m~FZbl)rcf?pQp_`GyJJ@!n2RDf6s0HiRUm
zyAJ!ezSqU8v&QAM;yB}~_>gj2XWRHfGKB1cSd|ZwT0VakDNM~4gVLO5%AZrK79U2L
zhQ_INEen3#IKZkTC0nM#u5!rvw{**6*V`!Erw$Kik-RJYa|C_8RI#$oxH<4^uS{Sy
zg6vpM0doXoPok)(s{w9A^6rl~;=HH-QQ$?}=!?E^K%`JH<ExsyRxh@zr8OEptTaGT
zHfE9vhzV0D6Xh6oR#A!?<R!a5gX7)(a=v<A9L=v#o-BR%wmrpfSu2%Pu1*ywG=N(h
zqw^{isjgC7a*W3RZV*7p75i<CYs=9mk%>O2<}%s;XIAYCs?(eVdkx|`!b+aAwhs;e
z8W$dvs-=3-USz)j7ra&fMN2m3tbNJ?LE}XfD0DK(;Jc523YLaI<N}EjG*fW+F9G-z
zXFi(O7k_8DBPFE<T2-py@CxC(bFIfpZ2AmOqMXr}RG>ZG0M3K$qq{@uoQZ(pWx8T|
zuaA-cW#+4q|E)bYm5MF3v!FxYPrHQ+6;Y|_W&P!?&fe~oe3qq?Rd-Id(z^H&bm_sb
zNG6OB7&&{R&Y(Kd6BnxlzLx)`E>A){&GbKB0*szr5HOF>1T%P3uNMiHnVRY~Z_*dO
zmPBq>PJhsSpzvw7s#+YM!I#+&dZFfQ0Us)K&F`MaCKMe$dt2_(-S608x%sJ~K{S(p
zmMyeNHcba5`~tU0+5~({OR5MK<0UFK4*XB99bwh>V$4am*S^G88bu_3vnRjwn~HtM
z+nLXlrxe8hQL{i0Sb~zJ!}8r;5^Xu8qpj^Ybtgsps;pu-B|W_(K+?i6#gt%*DQDX2
zOA{B`&FqjG>y0r#jO5qpXk)HQQx_l5X8}%k{Fla=8kaU<)vnn;f>ggApJOrdxqjW)
z((u8YOr<k(p!h!k_CN{0R>oJ^Oru1g`4u9*cr_8ouFg3W)-4ZpImo8x#XTGz4K`KD
z;-FX4Dm_}?{(!sx0QC?50EK)}>Jk^!7kkHY8v8%iOjkg=B>w=Q*Tz1pR$XS}elX5(
zYpLnJOj@u@C5;)GhUGN3kY^1a$T$8emc#sI6p}~P7_&^a;Y?sJgXX&^z*nqsMHv#3
zA3O?z?p_oskwxE+;&Hh7H66?-(Z?blyQPp&>mpRH@^Z$#m(h~NAE*9jj8~07RoHL9
z$5rUQgS`FCi&`@Q&MV7*lVN!@`<H{e=(9^L7jjG?b1Y5>%@L&BNp*G}Qa}{Ti#VfI
zkDS%Eg<}%7`8ux&HyFatT%?;UP2Ide2?#$~u2E+s!cqXBHF#mUmkE_s@+ta#nlS{(
z#8xK-S7p(pg6|ABYGF}@JQGA|dL6VWlwv>Loc7TBL1I5DF48j;S&~i#ROZxa4{5th
zhVJScDH0v|Ex;5mzoeZ~*;Qo6n(O;GuCHU4B<(eGZ5UMmo`ous$5M}B<&+R<DPv#|
zl1@9N(yZAeG6y3yq12;|tvDk;Ra<DJE;^|5N@<6bxgxujk+8=EQrbx*<f%CnPNizI
zTjhVAJEv&Y{>&h^E#`yUw)~Mx;tcl93vNN*)f2Y}3pNfs(p=s|E<LXzsx`azg>o~W
zp+QB-&|;hHwM&0|l1ST7%K%Oka4DKS^UG)aa7gDh6neZbdTrku!Rn%f`CmiHAY1J*
z^&g328iU;zX<;p_LO7)-ADTYlizXFz=7+XP;&NEzerUXs+g<>ren+*W?xWmn(3dg`
zW>~<+Pc%%Lo2=f^jFNgaIEvh<mT(F0np=KAt#Zgc+i?ke007A}EuH1H>pG#}af&Ua
zvYAX#Z9jD#j+GQr8Djx+;8RCxPWuaBmRej^_f2ycIX`vUvb4cf&PHm=R`!YA(56JT
z_Xlt}pz_JPVT^o<XAa9{0C9@0odvbmy@GOT)?29U9b;^5H2cJcL=UtP-7=MO%I&5>
zGMS)OK1EX9%3+WwC%&rkLS7(aAOTeu6Wc(==S;R~vGNF}m}ECm8(?I>AW&BlwaMEU
z!u`>f2HNE}vW>r*l5~Ys`#^uS6;UJaqPhqjelfqwo-L<LwZm@us8{z9&SU_AQ=5q;
zV2|dS<=9`~j9eqgBHB3muG9)ffTHt6G<LS>zauBfs6meDw#Ldu6xE2;^i<1iZU}C8
zshDO17BR*s*&+S15)X7X;QRsdHFo>xi<U>EA8ulYkPlThvKd)qL4oF<ZD#Plzb68S
zm?g{&wSmPZ`<;!Fmque-SGt8%6M<EhmZ@%Yow=<MhXI3iRF{ec#xuoqF-q3dP}<xv
zlXGs)YDn_uVB&+hl%#{(HO+A0a~@08n;S%K-yyQVLJ&_B0F0wDs2usC^yiXJE(y&|
zGNQ3zj8IZ>bjc)|qz$3K(Ms83e9^a4Z@5^%q|(z4&clP%CFt8PxTBoX$?nO>)wNI_
zxeK{a4;57Qw+7-Sjqrf-D)Q}vL<@IATZUk9S7XTI5gcqel2wIvb&7@($mGk>rp<Wv
z@cfQx_WuC)aPG(%s;Sk!hf0Y|(L{x!8E?f$r{6r%FrG38jMUJJdo(`jTz#5Fn$BbX
zCS20eY48;2`SNvAN!fuwOt?7}aL}@z!OaPI4mt5(VU$mmrUMIMvs({E`kLcNy8y9h
zTY^5TB2)q~#S(|(@v5bbPA)8X$-&w_h|@1()NK~x-NOPhapH|b$9&g_`YY8F=+}!J
zu382-BRyBLX!o#c7VB|r;na?C;<y@}EU?S)b}(mIBhO(#wtqBTyD{XNhhWGepMrxR
z)>A-q3|!+p)s4A7#Egokf<Q2OHDhfs+k$9x2grKmdCwGGzxh`U&*rOEQV0i;L<t@u
z3LlLWz!4-{4GiHk!Oa+wiBROwj{pz28XyCBNPWpyT0QGV1ah7Y2qMAA{{V_cha+<2
z5m7QVX0A3@P(jG40nJe9ca0edA>+kf#zQD0sv<rK0L>}Fjs-LlI2F=3;}rmNHU>pf
z`kP<1(X3v3i4-s#E_<kbUFthc4#DS|MfWlBT=Hw@A6WXYP}j8?B$b(B&J}awv}yG6
z&njIBlO%DgWS+kDjrPBPATkjEKNSYD6MVoeQ!GutIqtgQO1M=azpC_DaK{vCCo3jN
zP1`!=*K|nAx#FsB5XQu?z^zuFZt~j`o<|s}tzyDeW4vQEbfdvqD3Ijj92)hH(T~)N
zpHJS$VW>)8(Z~ZQs`6Ki#K)R?F(eU?YbIPvBx#nyZVu7+#+`o#qiYr2tbyZkkU^^?
zBZFTm{)&F4O`=`LrRs8*wR43e^<PhExAyne>2Yf!ylyfGHNwq_;*~R}k0P4199V|P
z>ZAx5WOGm}VCNYWp^CQQ{C=vNETPga4h=ywYzJ}Tif~<b1CffP*RNLI-LiSbES&`_
zGv2G!J(Xle$*e(hb15>hBBb>Wq6<kgx5OHUOe-a@oFV499-)z_e`iZc1-L%h6K(|%
zw%Z2(0QfYPG5ei)qo%cS9!7bt3R3Lpy68(SX66<VyBCackE+qxT`ci#PLKX8BLGOQ
zHn`4e^Ffw1x&6667wWp+pR1Mali<Uu)1<dW7WiYuIOS<(Mv3;fUJW&Q<+`rn&AU1^
z%LZ<~s#Q;MvN-%x2+kWEeNhmp&T3g25mc{z)Tj7y{{S^3C@EPVUV6IA)7CDb!%jyE
z4^&cebb#q{cdtENXX(8%M`u=?uf=obzEAbmy{P(!RFdZES0IzMbHEj=^$w+Z>I=y(
zCU$J}4??pSdSk4ZW#rdiNv4ElP27x{rI*`Ns*CbwA+~33Xxn>X8*V2Abyv1}TB{Pn
z0)o`wG04CXis@{lm})%->dUP%MS|g(t=ReTUdg6Tx`b&wO_<nciutuClv?{|8*|6q
zZof=@WiFX=ws()Z1~|#_T+L>Z?r`krX>~R5k?#=49C2iKOBUjHW`Tn4E11$~WDbqb
z6=fO?frkRPkMeaanj%YJaZ?3s<QimH$ZXLIgGv%efGhc@s{r4cjvzx~jfNy{SaVE;
z7Ce!>@C7_RTO*ot?FqBU@lj#3fIFuCgb4^ZZt2K@$;rhw+i)0aBZ1KFh#3l?1cGUI
zo>WuhUOMK6cf)>YP;6No?c3)y%Nf8V=8;b41d5pT&lLhSuonPVFL<Q{5x}OgJFqAa
za0fK+04T0=g#aE6VZT!ScV+2aJ=Q5!OSWH#2dZS7bb#7Q**EHEueARFNonrB>dAJ?
z@j!Yt@^`E~O@HbgQcKIZS+^5`o~j>MYxmmjsV&{)tmT-H3F@eUB==p7E|=T+OS6}#
z)Vc8<i;W9by3=ptxwny((X+v?ufIcoQ!TwiV;zO`fL$QK$?m=|gNEj`o|x3HG<)dp
ztt4b+An<D3`hRg+?5_@OkDiavkP8joJXb4V<GS--(cjb?Z&K-zTir|+=>`ZVy7Z#o
zE(U9jI5@gzMlwwj40l{PriLKkig52DfWug0isCWVIOEMGamFeTf>j;)s7^e0M9R75
zp^-SvR?z8)7!moUWl0pHjtxb|e>EzQM?KVw3q>{MnSqW!i8TmNoOhbg$cZd_7*#dg
z4HTF^$Hhk<D3O<jM1v!Wj(7Ous_9s$;Ly{TDVjmYdE?D1M!3yK7C`2e$ti$wOaY~K
zo)11L@u4IJ9M?n$A0y37$KZDNQ4z6^8O|yz%eX8pq`A3}Byo)7({&5jENoup>OmSP
z$t3q)4f>VqnYF2&(#lens&H}ss?&o{EYhS?mnz1sl|QQ=sW*DIsx7qEDFiLU4>jpO
zq2H-K{4r^|q`y7@B-e$_r*4nj5#$=Kvbwv|Z`N2#BO%DHwEa3N6uU4jTitJ?@7QHY
zqYi`>BZ}tYyl3=R^%*rQX!QH}F)EA#O?xD6vH(HIHO8D=UfFhTi%nV`nvyoi=emuQ
zsn3eweEFs5Zpt6jHQVW{#s?n*Um9z^+_%#iTjU&oE9_6yd9mqh&Bq@DUmR;Jsedas
zIb(|J`fA~`2CKj&x%P=rJO2Px5j)7hDiXu`r->kYFuMqEx+hO~pvo~*xNuE%?8H~d
zvO^GQ7vO2vamLobz;ZnPE3R_)L_2Ow=Z>mJ3+=E>0DMx&h^w{$z6)|IdHQz|I*R`Q
z?W|KkvB{Nj&2IkyP2`Pkf-`bOalLB)07)i}m!|&!)>dt`utdN{H)FbWrTB^sjBrH@
zbtS|%auB=YG%d%ua6>8;$93~vo0T)%!6i)4**Ln0s2%yKTF&MO%SPONRC*LrCAfWr
zfli-Lm`uu1h#g|F$vdc*#k&f#X%;=s4(b{2?q2iSXxrQSqirRY<}j-fU$awg_T~{7
z!TjQ>Z6@sumo^cntZs66{_4tO3~`c3z!U}IX2h6A*>S~I=`&qStrgx$&NKL`wr*Ep
zUuA1+rL}goUCuLsMNf5e1;P!P4OWq9HZe$MR372yIH*=2*F?V{bym4!U$qavcVW`Z
z4Ui;Ykx*LRDn<|-lTqBP(jRRH#-O)_zjm=76rS3vV#UXy*Ouj{_t(cX`+G=kj_E)@
z-C0ix?Aok4pzY#x+;hfyrQYj;Cu5p8VjE)^6<PlPO@!(K@;LJ2vqr7jJ<qa6KI+T}
zql9h61vy2G<Fx}l$q|BM>Z;#Yk?uXe+mNHpS<H(lATSgp&X9nDFyEX}i;t8<t@KL;
z-KH}l;k<EB-K>|HRLaNqQ(MDo>>f;JrJLraI0qeZLel7uhF96>;TZc*jCsu*)Be>$
z$l3hU?I3uNhGUu!RJZr=a&o+im%03zH(R0&c3Ildj4P7Eii6WI$rKs7bCdU0R?1lw
zWL~Okh|negAMH-^B^{{|H_@Kd4AW}@Bn6=!YU*smWwkzBbwXQ2-@M6QKI+ln{{Ryc
z$JH6hN|7b;q*Cd#+_a@6O{+_&S|q0~Dr-e?a>0iJrM)@tiZ@cYi#b~cjU!8O8k|$D
zPwm5GZqMB}N|re;SsaWCKH?={cH@eYZs?sNj{8z&xc0rQ=BAF~1wpucs%b5wk1{v}
z*Fhbu$0r<9+$nHN;H|h>E}sV>DoZ;P7s?Vq^F`k@P^J?H=n96x?nG=z+NZ45o4Dhk
zTcRyO@7s@I8;GDZ*(JKU4x?&=MBKzgg`1qy;fvZ#p#wDQY+_B^54gM+4%UK9IO3mc
zY;5h1&vtvDVz%3UCc(+<)y=|Y3=gVVIPj3J;<+lWPyMuxc)+ZKT$a2b)}C<AtrNEu
zNh}e;DQtPIx^|;>RXFlO6<M>2l~Op*#8c;zFr#P!qKD-9J}Tzj1~kbfkz<c`IjZNl
znm|r41zAiZlYz|+VdsItqX!k(uEKP)EzDq{z!XjBz*RldbTruH4ym{Mjqv9SRLSni
z!DVqwuGSgji~-3$E42BB78W2m@l$Fx<%pBI3eM9Pp8K(_Fxx0{RVLHmwVv7*Px26G
zIU$u)8)hJR#ZM##PD79?RF~L~ZZayBCcKu{91)vDxP%LtF~>BpX)wplyK(oLb3*qR
z!(bXZ2wEtB4b=%`l2n)#9t30Fen+pWceifP6(pAim8D2nup6^ca86YAb6;a;%7CY}
z$s~78_8vIF#ThD-CNurf7V#P5G58cBtTs`wJF}Y9`V-Yx8Xe-Kj#}N02+vhTeP-~#
zBNW{~Pr1IjTYFfU<a2-ks&eB}O)|($#o9iN)32@dD@D4ta6=5>8YVZ~apYDz(O#d`
z{X1f&_i;xV+I&{J*@TI`Ys}|@gxW`M4EM&_F5|nZo;Txz%|y6fTBI1p>{UgNg2LKB
z>ND7)Vs`BHRY^BFL5eaN%K)eSsuGI`Tnv###8H=;u85U8PnswpMFbqt0Xae>;-tqt
zQzCG~zG=i1Y?H+@-~jgucYIC=qx79YlMzaI`=QR(Y~q^8gCrap3xYmP7rwa2=Ci)B
z^*yeIVDrf1em*Oj68lz~%FaTI?l2q*@Sk4lx|X4I?Q-q6Kf^s%jXs^{m*A%^MTuJi
z^@pkT4^V0iJG=X8eku<YOlM;eGAYm9I?i0HDyZA!)DkqCi0yOLdOTR;jyqzGMoGE5
zXF<ABTX6t&O+|Kc+<>Q(Qbx*+{{XiY(oOC;jYbFET2@}@*jTqA%AU!m-o@{$Zs+QR
z(qutx?SsyFt0@^4JSRL=yfhSIbsJV!Vi$mD5;k$>y$42y!s__jUXY6?0gsBT^|z&D
z(r5P|KJ+chu&!*iF^)}=;ORe4lftsW-{0WM#cy!`0OgHa<IY8UC+JV=RN8&|X*#5&
z+mp3fk5!TMZM0HY%(8$E)4>&s)l1pxl89KL4Uk1+mQ|i|#@9xix)-Lw4C(fL`)P3n
z%qu;-f#YG2L84OtXBidqFX%_=e75pOqiYhkw%jANeI;u;O$y5khDI3#S2r#<2TYKX
zlCXmN0VAr!dg9|Cg{`Eg{AcR0KC>S(d{%sqg4ixh2C)&I*_S?Q+e>s^B_IpP6b;c|
zZG|7{Xx3m+&sE6v3v`jw(j`r@^FdE;Rd-FvN9tj{^6Cnq911T@wh-H~Vf}GKT?wT>
z<>5zFm)qNqX`^=O*-0cyF9MUxebF+))>ko0yNr%SOR3rbs^j@%ietp`SY}rFFG7mI
z;|=L@i{zA}X5!SvduEJqPB=6asL;qm59+IZ9eh~~ME?L9)7(O{$DDKGySTD@kWx4q
zA+l`%LACm)k&NP+v6zkz2C{y<^=-b9Y}WSjqg^n-P<p0vq!?p!me`Vd-_`coC8Nb0
zf$oUFP<pSQwXH_~09Mr+d(zCTdOj-BdG4=n<hi<Ll16UNidm8th$he<?xrnNu*%Wf
zhooZ31st!EsH#H|*}=tEz>?k0#kkv6*BeoGF#iC06&?WlLEHJQT0K<LY6|4dKBei{
z@<Kl3w&RFAcFiQBC2neqUbC7R$}*qswRZAbtm%Q1o_{s!7k-h_b>Bv<)wCd7D)M~S
zcP@Ww5qS>@JTS?YOAlTTp7gyc`u^!)wuzXZ#}(;*n*Alcmu1EC>=_3Fw*LU9+8&y=
zrYNIPbrHj7y7YTnV6lZ^gIxUj*<C=t7LhJwWo~r%bZZN1Yg8<N0LG8Abx^#9QaY^-
z<+L(D%IY{B*OlJ-q75U}AN1RY0yjJp;<-~&o3+`#Ek#r4-e70r)lV4D8K7+>TZCC7
zU{s8ZQ3<>n&*ZqUfk7B2nk@#=RgjVxb4SL-lqoz@rXwULC-X`-t7f?uZ~p-9qzA3X
zj}+)uaQ7Gz2lq}!aKkj4+&9Eh<d@IY5EG5XzFkvptr?U!7_Nd)4>V%Qxp3dcrvT)u
zMJvO1QW8MJ9MN}n;I8DL7bS|H6e@z9jt%0KiiQ}Z%CE=GV?R(mV`-vWJTXWb<cwf=
zri2{bL=uvUwm0=B*4DnAuy~=7Ug?Gu1J!)d>TOQ%Sh{<QnHk-Vc{E?BwMq3&N=fb|
zW-NHgs+Ur7xzE*i^qM%~R{J?RjZ4VsbZ73MliVW$qlyfi8oar+%8jQj^-)7>mLqRI
zYhJ@Gfm%Vi@%2}Bu^@{LQ2PrI0Y`plm_PLJ-*i$TbLp*S>q69Gy0w{?Yh-|XudM$7
zOFeUE>U}ae+C>*CpfD%8`07ZNh+$UVnDs6H0H(EAuC1hHP{VKsu~@ZQWszF!(}OnG
zPowUV02!_aX{>+gkF9JyRiwutm3{EPCC{4K1miWtlv;FjRFe=7x?FKcI2_kzl?)*q
z{wg<LuBvS2qilmwkq}8A86DFGNB2Y##zjQ`0D)DsI%f`PvqZBYQ{5oXVx&>W=89x!
z&0R$Hw*_U+)6uBc<iwN5iqT$M$8#B09QZY#o))*5qh}r}NmoNf{fZO}{nC&L9AcWJ
zgZVVFmM??H=9LXX6<yc|ii-B`+R6r=;!u&O&nLQzQ@^&;?V4%i2#Px<ymQu`r<Yl}
zXd#fTx}GuSwdr)y&pKTc=E||hlDG9^)z_M>*|M~emed~yx*OBW>3dxq{{H}T+_38&
zYM)tHx1?$^%rTk<;a)9>IU>C_Ei`dQY<M`?GCYpiHmfqJZ(tD^{vxST#_LdbXn@L$
zoYjTgvXG_Y+B{RV=<Z{8cuxd#S62mAQ|UT|-jjT@MI$4|+~C*LU!(rNgIm+$wYrcA
z)MF&~UpDJ{c#TqA49Ur>A4gu?X}2-UZsj(d*Ed(ASrx9$cr&;<eIIHohFW7BRtNOc
z){yHK%WZKW6R5{0y5E8r3gAgNx_zA_(^ia^>Ve0lFAskT`43tVJ;5YkgI`?!s<DgG
zw`Um}E9AXtQOigZ2Q}FA#qnkxUtm$o?}2Z$D)?%6i`&4Hl{`?ep%NnG;F`FaAp(If
zIUHA1!UHnI@+d-A0-=#lSvVBdd`QmYjNnwyBKu2($e|<@08&qP8O>e#YWw!ujC_&E
z9943tR^E8cPoz43)go10#IUSdxT#{2X00=8B4@Il@>wjy_^{18PunrT0|UB|=86p}
z+6TZcv@=G~Cit8m>b_4pa%lJ1({V<YIg%@1YBB*d9GQwmETDr_Ha8a+H#;1a!L1q?
z(FeQ89!+GPH;=PQXr3$NKotlF>WNE;f*E8ZKXqKN+fO&Rj0gMF*Vd~wvqcMFk=|;Z
zs>0F=Tj=Ls@vtNDMA_S;f<eJGO`>R%TSv^rxvODtwrmnY26z;?61}nHWWJd-#HnP2
z$TteDTew=}Fe7VMYtMCYY*yYq+4EJFRuV&No0cW$W5>m|TXCvpzRh7|-mDl^e5F?P
znLyn`XfeqWF!u=r@G3jmUOUX~{{VVMp6Wn`HO+-xOCmt>^Th{ebv48e$T_2}$+)*6
zhX=(+XA;9HLBg7rlBR%_T^ZYZO-=Q0dQ6D(&MMno{lT<Vv}WJtuXcQL6kyaZ2fe^r
zcb?jH&|7b?EfzO6iY_1Jj%s<Nv5oNV0sPdhY_P^j=v58&nA(KmP3AQR{%K;A<!#a@
z471pm0EkHEtW>s<J*;FiXYy!!ODQeTEK=?wiD)K~aOVPpR_epTKzDkD)45Az&OFns
z?wmMVl;)+9RkOem<Q~mV%?m^B6~`H<+NwyLS47gu5RWU4YM$p%jw^*t;F5aHLv5%n
z)$2g2Rac6Q^kq7l-4-sYbBc9|C-Y&8QDxkh5yxrn7{KGaQ1IV=<~}MdRz{Z9U;}9C
zpJQq5+Te0Pp%ohBu4~I16M=QK^n0j%MZ5OoLD};|%$EY-up|uCgprA@fb8O$wn;KN
zq|CpkE$$<`c_((pc&jZ&@_4S%VFan}r!o6SXyT+6%^=T;zIhXt=7D>;5s~gA8<+(>
z(+#kV2#u?!4FV7jD!Re$-r=3H5FRNYty<_V*O1FtNR+(5u>90lnvTgmm0vz6tBb-{
z+X?bMaY4gop^Hdz$L^k$+<=|-PZ^2R<Uv1G9l=$`D}{q0GVbI1pl;`Ha7KgACX8a6
z;18Bt!#ok&Gcsj>%|&CVCG2dfFjww?wvm=xOnw-vD=T|(8_O4(Sw%!GCb=u`q1z_v
zq}0Oao+|q8M3!G|4<owE>xntJ!sh_i%{8~lMnsoHGlIc(^NM;qD(wr+bRt5a5y0+<
znUq72xHYO$@I()18^<ws<QgLGMA$+QDj4K)r;4e+o<wqTc&3aT<VMmodKwianTIs(
zL;(A|=Bso;CD2z%PaZ1HUzDL3ayzPL9k~MC@&|J^Qxy%m?NVFKL#<2}O_83vr)g7*
zr6Ianxkad0si(k`OE8oqK$Y@n`2CiP<bGe8<tm42uYZb?OJrpanyOPwvYJ4b7slTU
z0&i+{z1`!u_mn89ltXNH5Dgg;L2#kL13oA@MJ|a?v*)>R69=5vTY{r;G|6{6fRx~U
zv9Y-|_C|c9o;l-K*kEonUY5wJ%n!<{z})A7MrfAzSJ&%nX&XDa#sKPu<sMjS!$rKh
zrQ+LL#$HA^01EeS(tp#deHz&>@1qZL!ypmWOZs2v3vC_+zPyY*%M9{5uSi3*V;HVB
zvs=0Cfxl0u{!#(ewD@l=nlgi~E6?w&=hAOB;FyW8PHw8VSJPs>w`FA+#|F7pbQcCK
z+9@Meh~%E@j&@LYwzKIsC0G*~H86j1L#fC#tg#YCHk{^&jlnqvg^U*Y9;#T^>%{=F
z-qt}N!yahAbfE*qQpG4n;-`xJWS82(%}WIqp}t#}#S;^U8(x5bm0b5kuOA?Jq9{Q4
zlg0q3?hf_H7^RMNBL}*9Lyy6z60&H0z(zBk>&^cFP`xcAk;!#zq@LIc^^a&`hML`H
z4I3aJj8&L&%M7<fS;1sf&zNnk?iy2R0+8h3iiYWBfy^=tGMpUOs(RbffAssqs@Q_N
zc_Br4hN8C<+`Yr&lgK^Sqtogmnl_F$Y%d(CgW$+Q`Ke=8i7;?;-`#X6kbDtP+%(Fh
zf-6mwQNrQT$XuwaEk5a{nIdP#c+F5G*kiopVxW7O5?#o|0ZAs@YAm}GjAoqGqknbj
z+r~wn+F>B?<JhZ9S#AxGw!QxVCee%@O?l>@>a9Z2@Ut;bs?zBGqlWTOvak$$t_Hsz
zqDgM$FDJD0eA*q&LDl$Xf;b_8cg6yi@l^L7nzXvT7fyRT)yg1z9ONFTn=5?ZMH`W@
zk(%SjETJwL>2qMgJasi`2l`|5_TN!uy+tvsU&N0ly+6^q{MuE*=~{szG4V!fJ3xjQ
zbm&$90__>a3+inxFJj#g+bZ%1tuG^_c+E?WOtNWF=+R3Ykpc*1VZjxS^~_4K6OJmI
z(Y~mf!eOQAxd0q1nz{9if_oDp5LAv03BQe*lv<TD%`Vu0-&JSmyM;@Y<2>_J*PGNi
zAmW$Pmjx&IoE+C9TQ$R84xXAkG|yMmZ@`Itl~6IwRDSfkZIJS66LMbNLH<~vHnYqd
zb~&J;^SeyfhYGGsYlvl-sYc*aE(j6qV}p)py8)CKAkyl#injjdM(PrjqmEG`nonyp
z+J2(iee*B}X{}wit0Pqbqb9KWt78lYR2}`~rpya}<|`g8R#-(wERRVHv*g`tex_ON
zB#s0rGbsv9c-+2@)Gqb6)U_+-lorSVx~nz3)eGi8a4Vy@)LKRiNK@5pj;A{QNV)n>
zIP-t|7XAw>c$NfFyY7IyhS?+dj!i|X-bZAt_bu}An#1e9rIu!B;Bvp3(WTMm)Vhyf
zWg4w!Ei7uw4^Yhn0}CK0>or8T4<mu%sU*6VS^ogY<;km^Iv1JHj}`2+8Yt=G%eHxc
zQT1H9%8Th2np6r~+-`AR)B0PgM?IrhHZliQ<Tk~jgC;)fOXxjHTU~M|94n9wXxCx)
zFtm&sY3!Yw?ORSdcCGu1P0^M)Y;<c=p<bKYa9@XB>6*==-)X{BKRjnOL8irb3zmt<
zVc7exGMhV9eHx8Ma}-UGPZjdt>fWFv^-Gw^Rz~i<yVahx{{W*zR{93UF_0_BZS+_5
zx2jtCAu-zT$s~7Lbc<}a%c6}(G1|K|Kcn8Qxt27tzYVjcb6(91Xz_+p7#?$2*gYv>
z29%ceajHCHh8>euzL(Twv~Sqn<YR?nS#!8bZN(SjQc+2={Kd5A7^xy`cT=6NG6*BZ
z4<e9rj;fSc5+q%irBla0?Lm9Qb}DEk$@z6g!9i%-Ta#SPgPMVe2O#E<JCn^DB?T)F
z<Oel1yr*}|suDbtlSFP9$m0~qRvMBICyEyGBmCnZ-6g5X`5G&D109Nj^m*6n2dm-H
zty&9q-E>Ajs__j<>TODSE^eeO=O>!>pVco$e*Ifhvs}ssS!>8z%x`GjcDnFEuJ5M9
z7TaB%?PTWNK8%z$>owc7#40KO0GjD-VrMNDKp%B)du0~pQg;6URRcl<dsTiTvt3Fs
zFgR>UPj?t6fuA&j)uWWFFUZx^)bPE8aqZYta7UT|dxlhyoc(5q3Y)k}O|%XvyLK=c
z1ou}Lwt%O)Pc>Wna_nTzF-Xuz)?)4SMKbm>Bj1`s7x{N701E4k;dvP!RIDY(>DR08
zy)&#wJTkD4aNIM+eUs=9R#|$VNRDVFQzp^4X1*4-R+3+BWBE5gSGoR-X}+P*qFqFn
zSPwf7)pB*%pqBptChaVw{wLGL2ufxM=77*Ocy&8Pkyvj7<iDD+JmiYy_H9<X8_!~t
z;~duxJFcJ-GBMRPfE?7MQwYa&7*iS-=yOrY-{z$`9aJyFW|fSE2P5-C-&-v4_44p)
zUV9YabN$pzlxO6R+HI^k`>M@vQn%A=-sa*!6-SfZY4tl_eg-N&U=lwy^PlP-v#U+5
z&1H8R#{wJ$@M}h&Pw!1d36m<v8)JQ4>I=_M&dUsiEtK#9?x{tyM;ekwXuCK>Q7nTP
zBly&fx<X2?$IW`JJ{aPT*rSh=B(ghZdr;nH+*dg2t7TSlT)E)&S31&|7zH`aQ@T7)
zmgjKDtGtx5SlK2i8RH&Jb?Zit{{U+t>jY+_wt?A<uD}7%u1Qr10w7y8D1=Pz>OJkm
zEm>IG`;ZycO5%oq8@4co2uD2C{+}rpVitrAoss5+CKLstYS-Fcs+TsUfJ2tWeI@#L
z>ib=PPK<k?PNNJnUpZV_$X(1zLa^tGH_-R@dM>K16r^*=&2qKcFL7><X5Bu0ygrYh
zKUUxW0NpE~Jgt1WH_QZh^1Hb;?|!=VCBCERhyMT~`^7mKuK~Jd5e5yFC!A9HY+lBh
zHBXYeP*ArU6BzJmCz;(<2qcd*x3VnP5v)BZ?xnU6uyt}l#}(3}8Cf&o#AoCQqc74p
zqJ-}Fk<AG*{{Y<kYmVsKq)8)GBb5{wQD5&yT&T_}=hGG?@6u9!0OqszQTs1enr5RU
zQH4Xa)(vE3EKNk!rfM@yp32a}e`gXi@`H?1BDPrHX>vgOG&kwf8=F$n@^R*^FD=;I
zF^psbUn|tb;ms^<p7TvFa~fzXO|eqqKsci{2%vaOA+Xh5Ykn=p*K83*M<f?<u?Oe&
zD;7m9$(m!r<P%QPWR7yibFltu=)$oeF}6(sZZ6{oE0rBlmY(ws?1vxKRhCX%vdE@c
zSqIn)HB)Q0cQa~nO&k0H<X1kYDqR>vYyj-louug@8-`Ejs#~$W!zDp*1u&`}4l2J?
zh^Ya3t64;C9`S`BR$6^xKi|Exw&opTtHz}#!0?UoUFqt`gCd=OX(Ok&tCK<7>Y_c0
z=z08A(Y#Y^Mlq4yJGi@Lbt?wbV*dai%u}us1(Y)!VurE2P{m6Eb3*EgE8i>bB037B
z`3}~kcDJYtXiKD^0phzr)a1K|DyRFWT<K8W#4>UZbVZelSi|kYMM{R+@;>EM197gz
zH80r8r*$0c(#N$^XgyU-+E_Z!zGd9qRohNc04N3p9Eu9IfpiOL70fJ2ITb0qB2A2^
zCWjiFjWIzBZTrPb`-^gNN6MAKuF!3_Y%$i^qstM;d7)ZMc^ITltbV9Vs3pB`Z5YQN
z*r@HagA4xvieg*uH565H`3BWtNvAKn?gR>d0*GV?IW(T^wyB9*Ka)aU-zV+bMFivO
zrOEIOw)-bo7UhcP6jU1^VFUefRQfH<lIB($MtH7!rkhg;bH_%a#_gxzvV8Da-`lB#
z?c+5ZDk8Q3_erQ+dr*9h4Gqw^h{lIJcSSbQQilajwSNqA1>Ob@D&I_%TF5_O4(fvQ
zOpbZ;E^-cP)-uuA7!Yk-V>PB~4l0=&rAS7S-tF5CI5;AsmSlqnm^3bxWR}ypW-qwp
z(RT*bADUN5)I-ORg==FD;s)$cGr=^+DU|^CPZ4{p3`Am`b97*j+c?ccntTeKbVlMq
z2~z$jTRT8<@-WYeD^Hd;$)6)VQ8%juX}Jidw;l-FT@`n+?Hgkmqx7Iw(+Bx|P!_W+
zw>a%bbbbBQaViUR$@4{~_#~ec1vE)-GfqLts-063!tSG*zPx3U+_4xq>Zor|w*f7{
ztr%UhmuVF?vbrJPPDwr0BxAaPOG?LS_?qJjs3J0RTiQU6VH?41iGtsX8cR@EnFsvR
zT0qduWoIP$6l9kWzYIodTZ{OP(Kn~jB6GB03hT)jUXl-V+dHUXliLTAQeC3^p~X{w
zJ7jV4R9wV@V$G6G8KtB!HgSbD4a+M`69bweNbv{@%VMsRZZrx(uOuzPVxwk|VON~h
zY!>$`H@sNwAL*l$P>(xy9m1-XI{mzj7kmYNz&h@f=kZLI0U8C2kyZE7>NCi{x5~yl
zs|`9skdx$`P?Th&ZG|;wfW@^yJ<`eqjxaG!Xz&g^QT~|HFSV^fZ7rc^+;CV91$~B6
zNcmDg`c9R3s$V6gk~EIz1cB9k8Tw=TeQD|YM7g<)Np!;`k=0m!oB9sR({@QLv8&xM
z$znRKwg;Z8i>uaddtz_Fr~XnHg98JBT*P!Moc{pDFjp~~(jU!s2?mi3NWIje6<WTT
z74!@pM(+l&H`bRLRo2-7F%w>-%g%FEn&z7hp=`)hZpQ|oQ==IM!k*%gL8li4a4Nd$
zD~%%T!zN5b_eP)+N4_(WKz<n9u#<2+)b`|W-_r^l4b7f-sRM;M#VE|Uwz(L=8L3uA
z5BREhrs(xYL2?NE(H4sh!*=2R^i90NLKi$$a!G~xd8wl$5Xe8efGk-Guq6Cx&9MBJ
zu~ESwaq>KURG$><kZ92g>*yABWsyM)FhQ>;_1~k#w9k8Pq{Mk;9xK_L2ImwF&8&AZ
zyi%Yj+z>@shcvLubVbRIIU`K@e(jPsXrsyJx)yLY0lNg(qyD6NE=@jWzp#rj9#{d@
zdDM^=Ic_o7uSbJCb4Jm|%ZcN3!3DnKj_MoBauAGjR(4Raxda|6ZB{*p7#TIAZv`mJ
zdt@9DPq&WYXJ243a(KlUwzA7`vBsr~8~`iZe@TB$5GrbVvS-RE6_-9(<BiDCh7y#%
zCTr5rYH`{`_wlkR9z|->$|+P!6XAgBy&GNh^|jTW*rHV0IKZzwx`J&sc1#_h*NoQV
zl;q`)kHPG;exZ^PQJXJ4IqG?A99rCxRB_F2EOlEOn8b@R^W!z;Odz}f^x~2zFQbzY
z%E5eK)?8Yr6-?KYNrzMNi>7V;U;7J)_r&V5j)<z*HL3pqWVVydxGw^z7S{SgM3SC*
z=8THPBihopBf76Xrcz(JzoKLG{5byre5|mRF>!3$Msr!~a&7M!bBr3cxzz2gquK7N
z@%LHNI?r(IhdJu7;*6c&M3&gPM$1Ii(o2Pb+deBzEs%8&7n;C&V$wr=vOhraUY%>S
zL_ezEjm8V&$J98%rvbWrMl{2J%_g%biqMmjQ@&0WDQ;j7YRFe}5k>r;CN(7GY{gy6
z4a6){p+{(*mzz<5OB~o*qq~AzOZg*kLX*{P{{T*}c9U$)`4qZbc-CFAS7pASrH(tX
zvNZH0!2G2j@kiQv9t9xGz}DeDN)9WXaf<4!7}K>AiuD;<{l}F4q<)-QYMM*_&}_It
zl_Sl3zv?X}?@Ygz*6A4?4mlmy-Ir;RUSayB`hREY8*_6CDqNt?1G?#HH6GL2`Y>uS
zJe>JHRkew|&BabDN2Ovi32tjmb?K;d?J`*BjEUnXvE60u<c-^C;gt1V%|3W!=;vj{
zD0XO|pWR)O;41Y;Y1c~)z?w0Qrm3N5_bLKOK5FYrvXV)7A0IWNB;y!1`VtXxl1TTD
zM%)Yi5+^79JkXk)5?fxDh<r(^e?)3x&K0+R<NpBRT4|?CyOdbhaPn)&Pxo+lVZ!9h
z>$+vOs{xAgFyQ_vbX_LbOtx4pV+g;2R+^^B^%aa`2eDILL+!MCJdDxY_R%R_j<iBJ
z6#Yv{OPD>##v~l%Q5FaajQ3X1pbL)`Q!KMHqu)GIe$wdeVZbJ?+vG3~b3l5FOa00j
zVED?sf+%lK>R^xBU2!t-Gv=0q`7C9NY5Nq3kWY#?+6~=9BwzTcK(UdM8=gf>a77FV
z2NdwccT=E}?kWZjD}<Tg9%-2@MRwnv5EI9^9D_tbA>D>NP{>K>(y-b=%`l~eyBZ=G
zZU>B4mVTmoFc)o3#V|W{73r4p2Ot`mqKZ^#oCa3zuECmErMe|qF{!F&%37tgt2reO
z)ja+xwrN{`w!8z~dvEHGr6#trmgh}{S~um!IThm4+qpR-487N;)8~$8+Bo@fI67oQ
z0Sa~~d41Gn@}%%6mSx7o87I1bva=_$9}GO2&{IR@iH)cr<YTI?yNX1QdX5cQMoXcK
zcPqwfDbdmbW&S=YOQ52UU@<obbvn@`xhoo>W1Im+!lG$CxYV-~fr|B?(+@ze!v6g@
zJcS~$YBdqV8)f*hJdThj>2K-bd6QGrV&HPBMSFZU(8mCVGywp{0I1=CAc<p$0H9z2
zMMQRik<EF$`K6Xt&dw}r8l+SyxX~}g%mkQ;`7Lj>eMSvJ*=3B3k;wY2+rpC}R{-X!
z;F{+_zY?ZRyB<f?RT)szjh=|ZCnKt8IH)w+=yeNanidBm#S+6D)!me#7?LTZW}V`i
zc*OvSbBt6+>ZWdLJYe%m(8!N&In5^rkx~)Q;*$Y+0)+;|1UFsQC-p1Q&}-H&aRh3&
zNHPc=*4%ytE(R*hSspnig`8BCGv%4=uI%m_(pcNxUNC9D<hO~VehYZ7Z2d*_n%}_p
z)<_oNMotHH;}?x0J4JOFSrFrqUY|*-S)=(m8L>QU&T6|>ar=kjtE2{E(#F|UI2CWN
z+rj{LZOni1RPYFu90nq`GJ~$5Z)F)}<n}5k_uD4<IX)@CyP!?LbHSx`WK7E8)bWZS
zB(-Jz%F8Lj5#qFZJn1c~LyVsX?z4gxF)Wc`fE`t~h6<4*9zKOct%VoqDlA1^0*qp+
zCAI?Z+=+=bWjn0SsJv4pWPoI^BhTFykU(_?xG*#x>l7oAnx(k55ZaQbAoeRmbBRy5
z20q19>RA1&Y#io>)Vd38_E1M^Gwwoh_f5$RRz%~$r^P&_wh%HtywXi`y_FqD$LgwJ
zB3pRLGP80zr%oH%Spn<C4<N*)vUZ;obTO-1v3B5iq-ZFokTkFIAMaCJLREnc=7+qU
zOrkir0QW^jBwJNWaw{%dt_!i%p0(%}k#yGsym3~rT;0S9%>g|B04mD<p6U)|TYkZ<
z{;azqP`JtNzF+G2o;_5zF3)%A`Bbo^iYwb^xNz#Eek$8XwP>QqQ~9B-=FFQ&tDAXC
zMY1&i0C8N*vwL=%Ro@`7>IE(gY`Lq;Ib@a$B!Bp-UytrE$OC>wTU*@wQQ&{#vSdoQ
zF2BH&7(}8sgaUgwqC^oa1jd}>ij09Pi5OFVda=l31QYdC^2Yv=U!>m4adkbdz#|6$
zebkGhSuN{bZTX)RY!>0?_mx0nMO>hUJiZ6!o>_LkhXJk3sFed<gbFr_wR>YNuF!Fc
zu2xUn2L`UK-B~~!(zf3$5|fMIqBdd~)TznOG<49Gj8vC##U0v~CnVI9N|8d+5x^B2
zvQAQT!K@r36n=#eX1111sMsQ^MevbXqfC#*R$E+M&8ZcRCRNX7io+>cV*Hx(5^T!g
z{%Qxe5}%V`Rc^OE&Ays_f=qyTq5Um=d3y<4jIQ4EMPZYM`z!v7Ii?o>0OM2ye^o42
z_Oiwt;4$WsacsOZjQ(m8@VXU>P=Bhgn&3P#OM&q*!^!+m_IC5cNs);rh_ryf42ARk
z)n==yP9)g?+}?3UF>TTrbx12yf#!#9{%R`-$L-n~6aCFDnQ&Qga1QUy9N`%<Dfc)k
zVl~YIi(wpJ;}{eso~3TwIOOzbsqRE%Av{ql`*`5}$NH%hn=Go2Ah)*dG7!L#Kxu>9
zxoFieD*E1Nt|BE!{-{X(m7RtLYQ;VpDp$}Q_*EHE%|ir>GVCXsZn1C>ltG%V(xjG2
z%O8jzb#!3%+5}hGd~MPwem$D1zq4sidqquus5BDC5~+R198q@*Bftn4;*}{vq{-y1
zTUmkyx09~v8WN5ke~}eDCRj4W=A&%hNsQomr&4}H+a-x%kqd!<Dz@fCl17aflU7X%
zLl(t6P|#TfkjksU`=c6Mv2UIMt!>N-Pv3Rk`B{~A<YtSxAPiJO+D3JCT!H4Bca}6&
zkJTg}wO3JsJE|R2o2yWuVzig2vRiFYk~^vi<75n^W9qKcw7Dps(M1$5ZSfD9CN|on
zw=^ZtkX(>Y6$(z<%IB)+xVdsf8MU7*I1~idt|i>8Xo#*E=MaVf8YW2nvClOVaAbUr
z(k)U)1&7E{w~VPLwgzx29TCsC#EvP~WkmV%YKdH@$o(oQE>dQl<Uz>lth80Qk}~oy
zG|Pjrjt6vRnK4MoBLc3;CgIUtb7D#yOV$UPl*0hS05?-j<PPOS4c}Ca5q1YWR(^k?
z)U4*1tbr6{dE$tgSR+74{{Y2L5VwvmkO2Lfj#Z9z{{R?LDL4(DE$Qz}>YCoJmM}!v
zByMbQE9sBZ??hPoXHJX=)vgH3F&yTubWcoa`V5Em@Gj>609Gp9vG-p{)N0;EwI4gh
zr%`su<QVRUbCXj(a%vavK5GwVv?1!d99JHA&2c#x6$o(PQ_y*+uty}*mo9KRsDeD?
zo;t4E;0kaU;8VaLQJ@tX#*YR346dVj92)b>$gZ?YqYQwUN$$N}j^1-w-&5K_e`?C$
z1_OgZv7=^G6l4z@WD(+%cgF;CSflE@O&47{T=IuTY-c@I%^4?!C4s}E41y?14J7<A
z<0SYVbBdZ4U`BhUXDR_a(t<+_o~Q(|hET&}o@mHlV8(I{Qwe#<JkbzN#NeDzA#jWU
z$fDqNV}n)ENDzW(q$Q5^;)tPdw4OSoWd(6b6pMqpo-t4;68_pNt7eK=)qxlwit%66
zFGoF$vR&DxVeEi9uX2zAIPpMU+C_I0!72t;7{RM>XO<amMR{>YBx#>6$1_7Z`~by1
z>7~57SmKX!HvkIQ{-k;u_RCNc>G8A2BYzR%wqK>+r>km-sOo4MQ_89FUAL}>x47nh
zmP&g;{{TyWPg2+>rK-jm&nl#KUcYY$5voQ4s2l-F99U4HKvB@A7b)>t;pWdQuG!PY
ziDOiUIl$Rj?@?%LcN0j8H)6CQK^Z2MKqm*PqKlKG#x*S__R?ZvMhEJW*6UDDi@@0*
ziuN0=J_$}<Fn`*IO)AtjeWm{ZimN7{7Ej!Zp{bldY38vup*RI0Kk-%C{93q#u-u@1
zSFZ~zbRP)R0r;v<QQE}Tu0jv$mKsbj<wR*}vdhWy{q!2F^Ze$14QYKn9I-^JDa%p!
z@FLut0o^tg+b9^O)l<bu6AOFovt6J|{zL~Dt;};r><_$nuQbwSDIf&a_e6D;-wbj^
zdHik1B^_BSjg!rKVrALMCa1I1$&{;hIH)JK{{SFugo+YKSX=@;8tSOU$90UO2`Up_
zwppb!GK`+69RlFlYUp!-PAcnEi2mDq9~G6jo;a>i%Z!6u$hku`h||dpr?~0u?%S(_
z!4*5GE8SCiO6X6eM8N0cR}a=}(x+(PHBNzMQbsDe=|IkE)=`m~&THC?Hf|P9%_Qd-
zD{Lw*Tq7Op{{T-zdut@npb}${JlBP4dMUTNVu`i4e1&}_cRJ4o^f?4qn0o5Y7^X8v
zfX9+5>{`EJ?y+U3_kJc{PSb6k6;>Nt{{VGpG%Y!xl#SltHBAKRb7JtuF(Ub`FX^mP
znM`+wanA(R{`}_$^5KKoj2h#V8)<NK`@FhtU#if<>KLPh`w`%uH9n!F+ce;n1e$aj
zED8YtVAgS&QZ6UCxcAO`t7N&BLc99|0;HD3B5pCZoRK2@h@k7i6}DMv=b}$_WL%Ii
zc&cb0II9b0C3&PAo!guPnxofrc6R;cx5h>1ioHYw<Y~-!D~>5z3S~E^HD$Ozx41iH
zUODqulCQatqO&^gmeI-k%RH-&NEK!2onAY*{h7WqF9aVn7ttTu3oxn-c4<ALd#ec}
zQZhTJB@C;aVw!<qzyR3IFT~`IO)w!3xv5Kl30eUhtP_exW1RO;nFAgwL_FY{iII~>
zkfg7gBH5U5c?ZoykOp&#P{SEh#Ww;4&k|fh&`JX%3=nI?KU4h=HkUk?_DWja&PX0B
z+G3LflTW?2g8I#^r$FfA9M+9KqF7~Ut2QMyO!*F2cDdNtK4}yrM<w#r=pU)xi<3gV
zd&nbZwR4_4SDjsm%7grIUY8bV<%Z;OvSV;`!mOlC?Cr)pQybsEk)nx78Nd_;oYKp7
zu)?Lw9C2R7`fupayKgSXC!M5KI-OK;M%iW@GRWxz{+ap*Y9^tmN7>4$73;RqMQ;+u
ziU0!u3hQ{5ID#$!UVy2UK;({V&F9T5vfR$bEK3@sE=U`N5pLNzH5@}D<x=h1!*5kd
zYbFE(o~jGvj^ZO506`Tax~kA`Al>&yfL3XGg~zA09g-5(u7Bda9?I@3%XO9(46HL)
zEke~VVJPeUR2l}SaiLx<waF&kjsZR>##>zrINv9vur$rw^G$+uxP@eigDVaQrC>p;
zN|D6U9!KVo_gp)r7~%A5vIi9B+EYL3sK+CkjZqV^-3$tEoYRNqxQtK=7*}|~86Q>V
zU#gysTbP?rv4{s8xB|U9I5`yAQDjoA0SAH&U4t~R%XCD^jZITNLiGcC68lCozoVZO
zK#@6+sq@bj?w_q5hd#sVT5LJ;>IYTf@GQ311UNghj%(CuwGqu5M<Xsb2TUGCKJZm7
zg{cDj;{f4@iY>Do<m?%0WDx|o{{SGt&U5!#3Q268;DOPUkM{1gS}m+gq>Gnj$vh16
zR1K&gy2Oy~RTu)S^fmp$JBBKwkeR50i$8CECFpfgT?T9ef&~~C2nqf;p=1idhCAx4
z=p;$HX*Pcq$8-(cN~Mq-s63NJ5b+!wf_kCuQKYwjbK}PbqajCf5oTi&gNm%4DPudz
zACaRHIbGT@@y8*l(M=@4ti&G+#Zbrxa_Q}^;06bZNTpYWfjkdCboo{U43a6`O}1E2
z<NYR;q7>IE7G1<)b5DpXFe-Sdbv!EVRnI-p@Qtj5xBFErc%F^=Z0t1%)TysUp7u7r
zFwLBS#%s<008e!Y()u{aAA??%dt$c|i;#K6e9!!gjbF)8?mZo+g>qMFmr+}|WgqcY
znl0!t1_wWi0Ko*(9m|p^-8#@Kd<M~6Dec+MXIj@qyfH^SzUz=GR)R9)WBf;TS*k}N
zh(~QM8_p?~c9+PYJo~d*wl=QEi!HA*kpUZ=pQ@>~(r;tAGyA3Y9N^W>g@l7GESrr-
z`<WuVG3_n&RjIgKQDjx}g6(8zcKf3Sp!E&QHIqDQSOH0>tnFsbs&@`WN29c}*<y9`
zj2cMC`7pMZD9&h;UEc35r4>&VrO+B>k+(TuI?YKXoG`-P(gn!R71}V)R~*&3GDd&3
zQWj0MY=c~!w8H>`4H0c1y0U1<>|&(VtX-4bUTd`2HpP%IO;u4!!seW571lPQ)^eH3
z6U|&l96;tcz(07dcq89}Kxz|IyZx=Z6>z(IG$kh4BDVxd54*Wa=7)yWZG&vUCZW<b
zbhv{m;f;0Xr)ZK0q>wC)(9J}p)do;$&?Wp*ToV+1p3OSLOK2B9?oZZe*jh0j<y@2I
zg+I9c#_^E9RLX5FsCCQiE!O3tDn<nksU5g1$Qh4h(GRJlND^U;(kvglUvoH09MHXX
zEDHn1T<~hWZ=;s%+Qj4)?ySAEjdm|pS2mb`dbbfNQ|6l<$9sU^AU7~v9ZYX??Fu(k
zn60wM4oRyK1Y~9Z0G22l#K{L30Ml<xt~v}}=S-7KR*K(o?1~94ov(-^0Gw6rlnB<q
zgW{_1VOXwr9w@@3-HE=L9-(i%hEd|6wu6Fkim^kzI2jnK`6US}5P15gi*-U<x*I%r
zU_i?XC9dU2aLNxAHK>MY_mj;s?i{JdB9%KE@B)`&SvETD1I<2ppoI%YMro159D$S^
zH8C`zjCaYqJc_&E2T6fOZ!Ru9)3`^T1zdjT+k`8wC}Ar>D#wh0PSj*Fs+CM)q`q7v
z7XH9*?m++tLHNFqB67xzTegbaMZj_>OM89NZ(;KLsITM8Zf5x}Rc_euguCD-%GTyI
zc_fs!PH{osumOqjRvL7VExFv?3aYwa^0;$Ni<vNGUuJGuz1xP*12pEhiBl?aPiCjQ
zwpiS-<21yK1m&|`Ey|S@t_UE8G>|avmgemHp4l9I)X0dkWtyY7jyWF+Ki-OIzC-;M
zHbs4~(Vy;;d^Eh%^r+G}7#_z}6wFwXqmfG|89o4;Pmo*5kr)F+Tc2`2f#Rztwvyq<
zClr34Zz86}Y-75UQBm)ZtGXTRKwE+SPrBeQR$Zigd>WoRW{HcI$*6_ol1gF;k}9<(
z{{SP`(K(CVQ5=$Bnwl$iv>zA9sNu9`m)w!@j)g<5Ey;@zNrOqmp4$t!@O$i~Buo)N
zNTxT#41-3<r*>EVDOnap!wUCJS>W!9))^HGFPb)ZS|;cCnjOY^>Z_sCuyeu2E0_Q(
zSpWbUj|I=-r37K*{we4}^-HrO0=YpQ4AcnqIi|}X9%-z^FXn-~xLA>y*bEAdks?dW
zl|&_&3|El;ta|w@ZKSc&pLFanSRSe0slK)_M%Fe@^KJ_R)p*7F&2=M7G=+)hCbwxc
zPcO=S8T0B^HMTvJH<uR|H!X8JGf2ldHNO2c^~9QVi>K-?S_U`-o~s#gYC`c<@+-wB
zIR?6UH1ToT(a(liMq7`nA(r0e-7YQMGO)-X(uX^+d9NJ(GWDC<O1gFYnS}7HPgU&D
z7-fNFiG!mIh84okf#Q`juZJ?otsJ9seN)$WgVjV18wA%$xEu<gxU#mK5*4|{9R#cn
z2l1*)c><lMzq+uun<r_;C=sH}kjHQ|O{}1<)7@7iY&;x;L}!3fiU4$j9Iu*BZzFa&
zsO_c<Hn#?d=y#eV2bKusAW0gwI>@P@Bp+{}I;j%`^GM6M9B{&=6Ui^TpNE=p0~s_u
zx{%p^4G?nY9w>9<BE~u9rNGD4FckoKq~EyF0OG`sdKC-4Gft90j=2>9JGpK?g$m>-
z;>4Wzs@-NX_UuUUR*`|6nu}3H_FH=>81q4e9!sjge{#nhgF<6$fcdM<dNuvadt@K#
zvRC()fAti%g4v1#@@!REDo|?6;c7|HZQaDS_mRHrC2oB!sR@-?8-ZB|wHI(<#z^X<
z(B`<XyjV(*gx3;rj!1FZJq|dmnCr>v-}MFwkBQ9#bAAK=0Hjr94(_=Oc^y?=qL*ch
zD;!l2l&A5syjUXTO_n~P)txPN2hK;`VmHe7K>LNVXq%?FD&FZNc&^edhVAS!b6Y86
zg|nv|F~g0~>%XV^j4)}cvq}`G@@r83pwpANLw~(_c9|0kao;pc#v)zJd-0mB`mRVN
z6puSS3}rUkH_hsOB)-!!ulv?J)VJ_m-m^r(*F08BR*bB{kP=UUMB5{zAhtpL)X%JW
zajP$mh8U`GW|}z!T4we<V!Yec7cF;vv}A&DT6evYNE+X9+ABE~zrU-1N5y%q5Y6w(
zF<m1v#qB9cjv6A{*{sm7$c(ig>Bwf5;Y?(yQ_d=VQ&qOJgJ%rb#wh;)LcDW_om36q
zRn>+?DN)aqWTUy7yDNELZ@6>Eb#H5Gp?*YE8ghdl$n)Zj+As!4s|t!OZxCFnsm^F}
z%)|y5qqV41{QKKLBN+g|p+xD0z`4fJR(8p>V2TFHH%xt1n(oI`7)OQ3{8ABtkzB8|
zVwhO7W5ocCd@|tj4Q4g#Hnf!A+BU+Rwkt&|Dh4e3RCY~xr|Tc;H?p#68cSu54&sb4
z%Oj|uN^yd=de@}AO=Ybow-oAUkA_^<l2MV^CcZfI2ED3i+OWS#UukT(&vo?Ar#)|{
zYBn%DjkZ_7BlB8w=_iZ&Gvm}kW`;y?S(~a2fh3Vuo5Y9;fI%J945+yzRz_(9Sn={b
zQ{%>SLF&4UHc_+6!A3L9W8wWkvq|QTQ-G)7e(9l(Q~jt+nPHFjIyd<qbDDTKBNd<Y
z@2ISGl+1Gg<NYMn@{p#4qTuO<GmO>6*(!p0@kK!|7(Gy;OpXO3HU@K0B-*`=zNup+
zrOGR#jxsCe-_+klU+FryirO}Nd0XN@>b{VMa(F#cFYGOEkPC>_@%(Z+tr#^D#P}%7
zg=FmUkJBGPEwEeb=<@x5MS8ugF~p%`U85aQ3u=(z6aWVRQ~j_GYK+;XmRBO|c;k&y
zIwsmid#TY$JB=}8$;lNmj}=`G$0vM>d$3jJnIQ@)8D!j}2AYKx+@3q8s%{-qQ#r}0
z(2Ra)t_aSKB3o#owL2ubgvB8Fcr-|6T##^lQ%ja?c7EznIRNzLu{Mo*EH9XtiNdk*
zUV{vg%7R5;%EKUv@?X?_V@uIAIWF&HK`<Wyo^w>6qMxgl+N|eF)FcSSu^A?;ge6zt
zs&b9~PjzvFnqf5}I!PF1AgLS?Q8=i;n1texl53b!uN~5iIDTn=2AWQJr3W<`4#qg9
z;Dh+5_9+`9x)c%{s3N$9qLo7<3=nI;e^-40CZ9Z-rkxouvC9BDub@W;pwulPy|s8J
zK=HW21XhhcqF80Q73IXTMw#)Qv3RFG>D&k1b>zPEfaC$33|FE4s`?sD4&qz8Sj$^T
zbC5rcd8|weV2thaUZVzR=8oiX@?&sx$1(QUgM*(ned)N}pkfXOnxl#(xMQ`s&jPf5
zpSP3SWaxP{p$J(NV{E`MBzUMTV41;Cr01$4RY(p4XCj#e#KKirh5++TVG>B0REbxS
z)@r@;GD!sEXvYSwu3Wn1`ENB@b&?5X1M(;k$XvSyYg7ar{{Sk2GbAzxQMih?)NIvb
zl496n{A#-1(TXc9q_ZB6Ra2l1ydqK_bO`6F7jD-%<bEmlf4F%H$}(4hQADyuFf4Kp
zic$bwFftLE6*!mfj?|Jhm0f$!B7~4_edq=aQu`ej9-O+88^tS;gI@jV()U^-vkb9X
z@NZ1Svt6gT#t${_9-h1w+CbTo6jy}*04=AsmnW+9B<a`_31YZ#5ab#*4MI56+ds(D
zj->;sZ*1>w@+zupNo2eI!tkfXc?idHsc?FCR%vWKsR3-|jYyE(B$*j2Lulw@k2^`H
zUh99goI+5Le1Tb9Ugo37*4R+JxLG~DhaOEx*AUwedSU+b%M1HylXH3dr)n0_>Jj^V
zB7N0Vw^QJ<f)@F1p&rp2O%ZDdE5OI~Rn|IRu%JgNDk#Rij>l?9!!5uDmQhYs5P0Bj
zqdGt~zn&;7d-$#986yUO^=Iy{J)v*$1-T-T?A8l3-b=CUnzk6yifxScwM^B$%FAoK
z9D+EYEN=jm#z@Y3rfH3vQ4?{;6f4^-H@@Iwl6tDPTVRINnq#&3B}tXzBxmY|yVKiJ
zv@w?>c;}k5nlF8DM_p4b(c&e13S~DMuFw;;NDtHc5Yk{O8+knQRNkY|X1vtmyGcsz
zJb-Id*D4rB8zj`JY^{kint19ar@zS^u2?0Ci)%zuGC-g&wTo+c{ips$u5MHn+MU&5
z((I6#u9Q1>%_JOmT*9gH#e~~kjnZQtGf$pmf<3VoZ=fmBT0uHuFM{~RN&9uS8R3ZX
zXg#AhMQne+FYb|r@C{m8UIrUwU8nOyLv+nC+@~4(p|yLjZ;*$YX16+^)%G2KG-x*l
z6dj!FJGOp8mMG)8wE>SLQrlats3AV8IHkiDEhWIz)mGB<L$H)5{wTdw;kb=ic*v^D
zD`PG5yYtOiYBs<_ls+lm+gn7ER;*wuwk2afJXUgRdxJL(!93N@maVyzl7G5&^gi2f
zUljP|wbKN?FM<ngw)U)~5=f`ni6xANY!A&w9h?jd)8+Ue!No@W1Fl%cwGlaBO);8d
zAPdN-Bx{zPLNR5}G<4T-MG(qh;*Qov*s`^<Mhg#+?3#imisOjbP_rxE9RC0mq>9wr
z$-Xuq52|gcE?*#wEri@c(E=*D(WPRT`5H3v`O+Vff(M$dj_yV#Wf%gV9AwceYqHMD
zAKOGyLIE{ds6y{Q!>U*e*VBnT(|~%Z?_1b$xQ;2}oF*jxe3{*1H&2UIe9jUPf^$|L
zrnqKhXxD>Pk$lu{8Lpl+@lBO0@<fv~k_ExX-C80d#hMvWRN+MrcQmmuQOG_in%?OS
z=(f{?NUeiTF|x=`X}T;Dg!jqdj;U_s0I>(E&oAFY@c9OP=5e$A>Rs;$EuLz&E4LH7
z5s~7oVr7vxIULoqi;rN7S3+6bO(|{Qg!`h9+sBtuH&<GSoRrAhT-NccWDY8sIKDa^
zd|iQE++Im25UB_cCatvgNh1M=%g$;TZV<>Jz{O28kU)0^3;Cs!wY4C-yFIO>b#b+G
zQ?Mik1D`a~rCSE3Ux4SD_lJOFZ0PZnaaEI#ZLoid=+V-U?tD`uRGD@+1X8|2xBy|1
zO;tImq>ZCHPhWJ>IK?dwV@Mg|nC-A<HPSFqReGiBK@&T;01io@-32QjeRz<eW?(Vq
zysz~G)?1mZtluQ?SWv#c^^~)2w6TAP;g}l4+mGBZk<Vw6YptR5_mk(7ucI!bQ?uh9
z%AIsP%3X;s-c3}rpu!_w50hFgT~YTAvF5Y4FCO1*R}J@FryaZ;#@p;z-T`h_M9ZFy
z3w>m=stDLKpQ>|Y01Y__Mg-^k6eSx4l(!H~X+FjUSXZR}nR?}|HtD*XlNsSy_^f61
zyGTQvw(m7tEN&)M91we~dGIO5nRYy`4$<`mvn9krco;H~kVQy$59P_PC;d3}sj+m^
zZ`_#A3dHqZu@j`oCXIt383b1cJ_%x#GqHy%$gDs!$*G}~40tsPvE6~qJwe^hD?JyM
zFi07(oC-b+uRV&VjfU|@T1t1}j};ILMcTvT-ANM_IL!?i*mA9$)Z_;Nl=Ds4y#!|*
zgHuL#FXpRO@GSn*Da}};e0Kps5OZNy<SU_+DdxDo^&kp~hTaIG6iizuAb==&uAx`i
z?tsI^QhJ`^_A63tGm+ja4t498E;n7PjP`3jS<Wirth*LC$<t?|>euaSY}26KipsC5
zq?+I}9e^IG)|#|;wxt$TAdZQu4MR<oN@YRu&x*x8Omaq~_OnzlJZ|dIs<`z;tpWB^
z=l59+Pt<yq#mSCIPClw=A8r)YW}^s><Xqz;HJTaR<x*#3L5C8TD74a}o@qnOP7mE_
zZ7i*s4)jdpyiht?B=*TD#t898#}ldGoL4tdmC95;XBj%G1G>`L33wOd^+UsN8#ZKI
zIIHL<AmH>q>AHoScM0!9g<;tHs^K<Nn?r1lQ~}tjXF|a49Abj7kgSDUf-1~7l0NfR
zLGlirnJrMF2aK-=qxD4THtX!5AM-?NmJHH^#@x|1KGn1*aQq6{Jh7E4IuppH1d(MN
zzSK&ik|{!&{!DrLrdUDmDSqi^xL7hcu4L`IFgm(JE<KfSNEs9j?Z{;zf7=xt(@8v}
zMl+h08zw>|n1Tn2DnDdw9?~XMX&iPcgI2x$tnBD^)|XYfjUXXF7!{U-Nt;)@-zG#r
z*Gof=c_nM3E_p^P<Wxpy)h@8P37$!<&!lB*77#>or@9X3O}DUwEMpvaqP;B_yLc0v
zXS(m>#-q{7%aP3VO9^BO54L%t-s#nb=*SehMX-)B$!rfbE$ZMCfmRvONvhtYj8P9K
zx{AyUfly4J_iV%fe-$K04L~Kmq0brWj>*S(s;dY25l8PPlr$@2AXIb8kpk?&R1A`7
zE+$B#XOY6Hj!k(#>Ibbw!%3&u$U=?_1KnMN1hY$QMahw4jj*rOZ&*B`>@>#Xr*U3Y
zE{wO1ZY4Qm$tJBV)=A{t*={-f(W`D$<dCE4y+)f!3^Al}b7z)UY{y*N8)e#yj8%?@
zt<0z`mA1MZim=pe35@VFSu4%f*FNI#!1$)ijWtCQR*$8BPJL9Gj+&O!IQOF%IrCbX
z?&87885Q$yqP5ntx=F(v9_!HN^+-0R;7P_h&3VmUnd6roGpC`|dy#RUXsrE3^#qr<
z&uJMbs&S9H#H4CuB0_(<BV4>|tMhfYmgHi(1W94u4kYpobu_q?Ae9_FepzJ5+IE6;
zi7sp|*5U_X0q|?vy*H|U`%H>QY_>VC5)!gR075?VTfalzN^LR70M=ho#+($9tI_hc
zHx~FkEhWwX{{WhLXLehvtg@HgT;`2d=o4IH(i8sxt1396B4T(o0un|Rox5+IDUhrp
zXw{Afbpcl0^5YcvWXPwww`QYO3!xLdVw`>hj_I3-=9<8sNuUuKx#pXUxFBFxC?&rY
zGi?ixG)Qp^kIf*;woNdQ!vnEQ_enbsH2_G?Is2^7u0360>1{4co0(VL!PuGSH6O3N
zW2bssOOk2iQ$EwNbIxnz-(LE&Q1vdSCB@9lV<hcd@C9kXr<PavD9xE;Uy^Us4_azI
zr_`jH;%4@<@J#RpQE1wg#-Do~&Af#j4nYR13leH*6qe0)qlz(()N-Yf%2s`g`Vac2
z7PVy_m#It^;Xn#l_g>VlugEzE&3<|MZuaX<ypHPHMoAQdkZbEN(Vx_teQL@(3;9Cb
zD8MB5T+L3I<M}R)*fVe8_IlHdQ+lVN4l)QLn*3Hayqc3Zwm8LcBRo?x;MXrEkYVA0
z>Yb2A38;v{<El_|%}XHcjBO*DYjAN(kZB3e6&nfz)->yFPR2`1sKdtN9E$m`^()aA
z9-P0HSdg1HB&h?c`h%R+Z>V%@O>0k*(&{kE!vK-hYSQYZf?tB1*j7Y*bDg6KcK&No
z>5GH=a}<ra8?#USLG*3Ur}cM6MmDT|3<VFMMnokPU@zTzj9DX+DoEr?E-sm6cM*u3
z@`sU8*~nf%nZYKEk{6oaJd!+6ap4%LU93EsyiohN_Nyr5W4aD2S>;D58FUND`l2na
z3r15LzUWymZfs186K8nFDsx0LI>c8ow)wyGM+z#nrg0L-yI@n1MqkY(%<Ub^kOAZB
zf?wNy;P2eXJk@OgF4Uy4vrIR*92%;a5XHgC@kTAoaWPA8k#0HaftL~76Dj0)6qFEq
znB;T?*SZwO<+h;CD%R<<Z@dFlhEl9T=bq}R42xX?PhrH4eAlviD*E2*O<1rC*M!@<
zf(Cf6QT-((-29(Eit<0?x>(B#pOktZM$Xjs8=XlaibWpSUOuUot{Jd8s|{KxE+$nB
zH)E=)yF;gZ$H{Bw&~~fI>*~@p2_i#mZ~<aC6f|%sjLmZnN&6L9r{DhNcXPquR)XEy
z=G)qJ;EYu=jG)I<xB-DJ-au7A8LK&wJ&Y#@6<H>gEvej+WE0gNZydhXTplX9bL3{L
zpp7vZQw(!J`i{ywhDXT`r;16hrj5488OD2};dGq=anI(Kr3Ywkdtj>r8+6<=flj-K
zm;&Q8Qy*<{w2@R-y4-MTA{SsY(58%KK7kbE%X=j!XdRCgHT9b|#|?v;rH1}xy+W(-
zV^?~G;zw?+o_nQ|lnCPNo<&$~_Z~<URGNxP%^p5NpAG1oZSOP@vS_awb?LlOieJbo
zDm|3C=BIaUs>K{Ivu-%eXri9sEKIIQA#;i)ZDE;J-X$zN;+=4T<MzVj9x0_fZ6udL
za+6e3`eRp@Qn-<1LxniaUfkOWSChpBW2PmNv&1$Q<j`8({nQrhw)ts1QId@Ed=(&D
zenXjmZo6V`X)TQNONq(aDzDS>-bo(&wL8BxYbE?j+d%mb6ekB4Lo5EmC6?0N2_;2A
z>@DER;Hj$nn>p{BbDfFcQC3#B7K7mV6dFl)$SE)ECOa?NrUPc9l6Z&2;2x?;F578V
zT$5Rg%ZcXIRtV962b|G_9zsDY`5IWQ#A;_&12rI(F5=p(HxBB);pC3bdr80?flc)E
zi!?6rf)0Gt*wl9dRU*pX-Brjq2CFsO!E81a;Pz{#zm_|}7SdbnS8A}V)~DPVBvFSF
zTZM3{Wo?b}y}%Lj4OqxhR@t6>&{}PRLl#%O_fot}A9|neu9np422xUFvm(GUhnjIj
zo(;{6{Zrx)>?T8#L&&QxL@s~Yky~PM(G0tg6&?5jrG=es2zZG!>li(pyxC*<s4w*d
z_W~lGPc-#zkZ-~(ohFuYkHVi6MAm5(AZCw_-YH@d2l;nZ_pU>2EyY>Aw@6PkL#BW(
z3MUz^)ve1&3%6a-5&JR2?x_KVGHDj#BPU{=N$#-CeHAzGtg&S<Le*B~Kvb_adnUw(
zfmzK$U}q|oyA8ey8{~Y~5vmQi-BHIH%KOO81|pH9=c+Kwi5}2-tu8dF1tQl=mA$t>
zc*P3w8Kr33IH@#uP#Jq9RHSW`lUcfxAD|E0!5e#QD3~qbakRDop|G-D9G<AWs;N*Z
z<k=}ASmS~)L~YG1D=W$KI6*<cj*eeEeNjs+^JODkRc<frc(vp)R>>Q&x}eo8KYIqm
z1%EU&T3o9yyz&y?S*vSFSq9lbA4aN^*zPXFw$E~r0R+&@#XujLng`15W14w|TO?P$
zYR>`CD*#-0%|{}D#~stBQXifw64>X>C}?bty>_>BB#dJOANivW8F}uZl>-8fC<QW;
zRli6fXLbspa&ul!>pxVB<0Y-Eq$uE#SAVQA>lX0h*fF;~kzP>%Wm3`}Pm^6QPSQss
z-!h#}p}}p56p^{fCpkRi)ZW{y3KpV}Zex;1)jxExAP1w>dN`^%P4W$P=15s~;~r}_
zGJli4L9IN>3Z2~%R2L{c)*`E)U?IqM3Tf(@;B+Y?2m#OYrjq4?kBR%GAh9!V*x$yR
zsKXIo@sE`*(yz(S=A<f_C%!3KvK07qJGlfN4SHAU$E}P-G~Ge|2Zds5B(~AZY)Al*
zI;t7N!#BhgV0bl~QGz)mwU^<_;O!q!$FblN#6ec%5lHS754!UI0Mh>eSzIwqGW#sO
zE1LBg)G{k4AdU@r%=o2>RL<5ciz2i>2i$q1Rkkh%G!3u`!_f)3&H>`8T~P0^%Xz>&
z)r@Ww1_r4|!LZ!^Xqcws1kmW6mU&5Ae;dB&h;A@;H3J4V?hQpoq1QO%QC1EE(*<`g
z2A75hBbpu;C_Yq5fIwm05(Cd}t}x4$tp2s4>9-0)g$EU^c@(K!c&sMBWvfSi*ARwu
z@C8?r()f>#o8--2=#OhLHjKmT6n>buW(}7BpYEg7JxMN@g|!PNMFaT}R8RVsq82N{
zn&j#+XU@N2(#Je_`;l*bXCg6@HOl#*nUU1TDE|POOD|mM6E+3He7qW-YmGBdJK~)I
zKSr^vQ^)(_&2h^X?hHFixr*K&ZCioWT-o2o_*{Z(C~Vjt@yrPR>Cua*o8n0|MDeLR
zvE1*oaFM}30r1ro=4_C&K0xzX%RNR^;!J~6>l%uG(&l-j0d}7?U$)z-CD$a<Ucqqw
z><*=eU>Y*g;1Q5cYv*tO04sG3H&>nQ<}DWUftu9mpQ=%;c)5V@9&53e&~*?>QRKlN
zRlz3W-5$epCV!Jvb>^*Nsx6XbY4gg1UPbC3)fw(B+8Zd$WQ=aCR+;Kc>s?)Mnr3oa
zED5W0{(+WJZzlzLeP<RnlsP`C(l1Bt$R`|7_b1+$9y3`lNBW9AYSA9w{{T}TC9R!}
zh4sXXwlj~sS26l_c+YEH8$VZx$8vp)Mpzf(qcqo&dykA!xA%(G>0Xv;BoA@Ud{(L-
zPg=unR7M6zc&&K7CAWDQn<uCx#8KmOSlnF9A|Qp&CXTiCY=MOEMQQbWCAPS2tASaa
zUKsgy2b${R#~f>HOmfL7yJbGPb{m8s)%WSuqt5%>WAj<R*~c`)X(yWW{{Tm6%Wq_u
z;MRnd6-k$n#@6YTIiNK=tVT!rk3x^Um)eDndKF8j#@9CnX+hhbDSO~srC`!pOpyES
zl3IpG;~r~SIx9vKf)5ozf2YX=rbPsysU!%>`yXNsdcdNtXO7+$mS+hXjFL@Po2acJ
zX{T_JsL3SPjee&7ph>99CZDId5Glc6d#1yNS*5li$&q7L%YR;a>;9v0CYz>6L~3x%
zr@F>j*}1t^kQNSn)jy_;Yjj>tU5<IJjBA`qqzrXlqe-WZ7*aTyvr8<kjfOT!OmVI}
zAH^s_(;d!OElX-hk(s$;P%Ne<lxGLAn%Gh@{{SR>#?U%7i}e(7@7$#KTDfl8U{%Le
zn%C4aHavGpE>J*pi@$13fMgoEyqf8wQ45fb*sR5vDJrs@iqjdy79QzboN-vPMLx{g
z8-&!Cl6zZ~2&*GyhZSKIYdGAEwZ6q>?Tbs9oZ~!!RyKDNMx{}<gZDu*E>%v=KAops
zU(O(imqFWB>qFCDPqy}pWSY(Xo82=?(3~9kuR%8Cfq{Wsjc!>vZ18uzGXxYCMcFQz
z%oqdJUPb``imS4>C`TV3y0wU^j0)zob`vC4$DCJk&Br2`S9c)dn({P!sUMn&5wNBB
zuN+i~CjS75HAQ``TU-ACIR<{Jj`P)Qgo|`P-Cd6i=h8A})ndY~ayK)Asu@{89puoG
z>K2L2vo9Y;yz1v%)b5D}QUUM{3Z`etcVbWGwMWzE<l|@m03pSXh^svny+Nj?K$j#`
z59(_H#`BuNEG@evnvlb|=B-|z`8iYn04U1e#wgpz>RTM1O$o31mJ3ZXON&A7&yR}M
zP$yvZToVHTXZxu16v)5IGOyhfcz5b0_PMNTQe0oGyL0hea4Qb+F(eRw-oB-|(`@eI
z**&^8e-)JV2kFM0bd2_DB9w~gV^NBvsMU+qp{V&bcnZRE)gNhZmRyR_Yrd4e(q$_&
zO|%{x6&0<fBMes<tCUTNr_mfRZ3AfTuQWT$jVA6pxFltCIpEM}iWp#EQ85)fxXAjZ
zloXXQlWE#MrTroOLbdf3ids(LZjfMtd#^}td9Q_ZO>*x-*HZ4ynlP=L4>k3N>1VDj
zJyE5g_f%Y}fXYub!PRM=Cu^gngEE@uvu@m)On9j={{S?E8phG7F`2-o<b#@%{i%oZ
zNYKfSQVl#PJk&}se-$7&>WU*598wN=G~>wPo8J`(ieFY}7P{u1_VdOIW8&E8*MVz#
z9oMJzDQ)JAl_%nH;=Y&%!KyD-`g+@1v1MdmbPtPSw`jF*45{>I&w))^?D_g9o;*mo
zC_5D;?Zv#zkFWv<BCNebV=jp!+fKOWf=6|pEg5{MI6iCCW15;$M<z+9OqNMo1>r!0
z&MLlKvB_ddJ(@ZdVoo_jL*K?#c4HZ<Qb02LuwioTjEW4XNDkAS_e5&OZ`?E~^8omv
zBV(R^E7lEGz(R?#Gb(aLMzV<;bnD#_a^f}Gry$fMpKk{k&lIeK?xU0T;lcR$s&lwX
zPm0l9I;E2C<N?5{8;4mK72tWQW-<)6jy8xs?4PRif723>%p0uNm_aCr{97dWtrw+r
z$)LYj3C2$by#D~K>9S_i$|&r5W@tkyz7J`V$<51Lti%e6R1!YdV#2Jnq%&y2-M&S0
zRRV32mGheUhbgu6dzd#k@DXWZ7F7`NGtFIH2DG-^jg}*ULD@=(p5ygJBMZqvC*nG)
zGD)|{Vu0<hF67HgAN5uimlrn&+xGCa4b!}GtZ1OG;89WCCA5Dm0(z>X*MwhIh;FXG
zZ8E}Q&VLm-21s`*Z8+kU3{vJ<ai0047ty=>ap2>+SIIXnk}9;_LTRra)_uxB<Bt@#
zzMQwZzgx!bYS>xE^83o5Ki(>7U7}1k3yObr#oRa*a`-94AYB9lh3c#>EtW`tE1Y<s
zOv)xQf(quW^!tdeKZyo|ZSn~wm<5z;`+~bR?W1RhKgX(@YD0F+Z~>^bNm-yKA=lLn
z*C05yHd$5(cp%gk8kAE=K--Z*EHYkNxc4yK0Yd1SG}aS5(=l?!o7+^|Wr@WqW!?42
z$N+#%MIF3Mh1>#alhqdYv&{=yIZl36d#0jAWQ#nToDOPA3NcblTi^~^#8ZR|@S?rD
zN5kzKzUj06)tejC8e_oH#8xBy5kvAzT~P)5&9iJAocN-x48q_1o#K++4ZJvzWxSI?
z&#IK4+XzVeqN9BUw*DxLnW4Pg%vclE6tG5R_nAO?k{5z}9ye2_+Ne(8Dmj6!XcJJ4
zS|S19_e5zBdltmTFbxfDaSZabZbk(e4b{9ddt1WistWoZYNJ4ROkubaY|Rj5j(D<o
z2Bx}=WkZY*eA6vqjbI?2MKz{XPia{!hTVu!l_raV1$ceJbDtGKu3b#m!U#vl?yRAm
ztu+1fN!Q(9EN+wg0^%fknoW(citQ@a;t0%+a!-mT3%KOB_Z4zanu^wGO~En(7KON{
z8VXH~xt8JBrrdsLXl=tv70yYkJFB~X0LJgC0ivCs-5(K62J7gZ54%;hPm6({%~P^G
z({jgX?v1#&X&-wrC+eJ$dt-AgTLzLVZ^#wQh=fD|E9B8W*h><~Fu0*^@Xf%@Sz7-9
zlEAJy7^5dtHGFKOxRe_=+sk;Y%Os)85uR&FaT!UOKhkQd?RHE@ir<1&!YOH3Bzw<e
znu1$-J<a2qpWVSDBA*`YZar3=&2mJ%7P?dsrN6ja<%(qP3Ofo*2h2rqYm94?lf`9!
zy|zD!Dx(rc%_+&r;;yY_iJ{3)krV=q&$Ap+*7783L#Y1g^2*U89(f{)MQe-^g{uW9
zSNsE?#Rjb%!7Cm!MOtFb2ySYL^jS!n-YMhxzsj3-?8%ZbiXFA6OB5WQsMt_kLK%++
zk;TWEAU5A;vpTxALE?ypZK;~CSf!faN$-^dwWB4AV&zVL>)-UO@HcLRmA$}C2QX!C
z#Y|rVnrZp*;+L@WLaa%t1{;b;LW6-!Qfd%`Qm}&h)$Oih4vaC+itvBbZ${l{moISw
z7qxSdfz^Er76>7zJz~b*`pxaGV^@ubk=1C^=av~-E6D{lO!-m9VhiL`y}igjS;van
z`uoxIXxA-0yk)J}<PqYrH!Xq|A+-6gPlG&i$8=6);Na<z$U#DkIp|dmr3{fyNv#dM
zKI0a4#ww>=B(T8bc4_h<3Wi2j$p<E-3z&}JRjK>-?I)8?d3T?Z=Bfb5*n2=Sl6;y~
zh&Rbd0CUYE$VOLh70vMM@Mtm?Iy7Qn;kX}kw!WO3Sk+SA(Mt{ACIhNF({|SyysKqr
z0Z@2n#d}wzJvnQlS*TPEF&`%LSaq7X<BcxOT6_;9{1iHO>GhR{j;R`t+<~3nnr@4K
zHJ#DZ^$dUx6^(C?$l|E|OQOYn0eHsQmyktpC6UeCj<jH+sS>m+?T73hMK&z^nFjz?
ze?--1wYdKPr)nhdaI9-bA`EVioc&TYT#lWME*XwFrG$V<YKfK51Bz8{v*wg32iwY3
zyT~<V17`#pq(H-(ItdD6Jm#VyZnCVu5->+p^>$X;O#>Tl<Qy6;95?{?OaQP0u0iIU
zpkw4S{m_A#2qbYuBL`fQK!snaG#f2vPL|T*CHamR4y)y#)W1Vs`eRam@iSX7%D5Hv
z<V*u|)l~YoLbBI%*)1-jRt!EQj;h=_UMX0aaV(0DiHE$6m1ZB+9cQU(5D-%0LHNyU
zeRt@~pHJ$q8^%lo<y#&rHr9ZgD%WlhhG^et<w?&JtD@&l^`5P20Asy>Caz%pL%9o?
zBdpXG3}PQ14K?)kKt?#KGI~auZrlr@>N<#ZBU*iZB+LFNxvE`n)VihoZ3{72&w)c}
zmIHWE*{c`R5;ol9a5ZKxNYi3_qaI0<)O8sZIT*)<nTgtasNUF~LYmjfrhvz4@Oq)Y
zVnb~!jMmqT#b!!K7;%byg@wi4%0UEd@-e^wR+Y4gZm#3Dw}`Zialo&1{U-f2w$fuw
zM^cGiaxxT-t0to?aI0nbaLXg4Xa4|BJrAk+cGfLYY~`>@ek<q?NZi=!7OG-ZSwQ(M
z3ik2I7q(Oc#xaVcnhTv0_+DA(+t0`f;Lkgpt(|z`QH>+i5(QQ0Q*I>-9@ACY4z~uU
zWEog*$H-H4TaB?EXf_Hn-lW*&i0-oYFs!IihI3Z>oaNktXegp*ITcxFNT|^CwHlFD
z<P*hu4xFtDY*ATAVv^rcjFFAiv#{nJ*~zO{KxJpBE=+K3@@hRM?kQkYQUebZFRb?^
zxsxOTRo<E_J|#!_4(K8^X3pO&jM2i`C68+igH`%$!JH$E8oq@_+Ll(;9v%Aar|Wjs
zU-m)2YJZm6@SAvn&$t_d$*;F{iwHFfNiD7;4;zdSMSR`*r|3(a3TbX_A}wmfoPpJK
zJv%%i_A?%<9HsI+>dd-b4{MR04Q({>n`7=V1dla}wV&INVR}5)lGgF2jB|lq=)VUw
zme3O$5F2v+)N%;sljL@hP+Lp{yGKTr>4Hg~>cRqQ1!He*f0a^d5#d{$gWXu&Ke;K9
zdtU0cSwpiS<eEl8yGXb9uQuc04QOxRgs4St@u=pjbZkXDidT|&t5>j+MKQT58nNW;
z#hPC}d=-|8Ww_d$sOqf0Y({a4we<f0rplJw_~Rml)DdjtWe<`MREbeWS?+vvAL*;M
zv%ZL-##@^9yA872#VI)~px1$Ew{WH8W>OSqlU~oE>IwTON;qZOFhQ<Xz9}?!y(Us{
zc50Cnr_dDH3dAx<3-?u*I@Po`<hy`z)noNPP~Bd+jut9_);xG8mC{E2Mz0>8H#Tau
zEq>n9Dy|v6g=Vk)L3wmYLQ#JeQn48yd171hL`M!6kzH&ap9pg<PcN<Y$<K&zh65zc
zmL-pz)Zupw6#!8&#HyGAq>ds%<GScW5?vm9HnLKDutb3W03a%8;cVuK5s(0>V~D16
z=C0Utr=G#Kk_g8Xv7r=faqxaL?`3;YML{Dz_jUqfIqJB=G4)nn%Iw?^%j&sp_ly8H
zRB=o%bc*j|YUebn2ss%)-5SY(w69egN0Y$7r1q5;V9fFK%DekZd&OkBiCswYE6#mU
z>2|VrxwcYaJ~7pLvqYnp82X^Dqe)_Dr9f4<=BaUT<sOSiudXC`Tdv61l_2;OmhKdM
zhX%F2vd~`2Q*jdJ6Zq<}=a(Rwu{%e85>i%y^;vex3W3j6XXsy5UwUg;j^^G%2I078
z#Zqe7+`Q26h2*nyNs}6yrdt^}yGPtVr{1x#^*)yz(MrDXUz+Ed)#MTRuZVw6y<L0h
zt!6o5mA#_hCC?T0*Q32tVXx^C-NiZvBjgF{xEhT-u*>juFy&bhqR%wf1d~)chpF_-
zTlInNR|m2UTG-#(UoPuumHz<VvbeNdrk@Fupd*55-5hwX!9FXpIz7^kI8(bh6%c>;
zsblXo5e@?<^GbTfDeoefPHGU!`uEb%>$-dGP$yB9HQ{=On>L+1me*0Tl{n5T>HfwI
zVt-M6DITw6^IND{)Mt-1)6i>P7q(p)br?6RU7i~gGN4uHo+>+sFZ_te;*Wbqv%70+
zkXK>HHCL%@jzUNA`>#zFq^%sLsG_@BV!V~23jY9gPjkEX1W2SP=ZdwunmF#a!69IO
zkTeX9?nunz8>wheSz0pcV;uE^QBA%n4;csl02MdwT(NaIQBhnoHZXYeO2SUz=CTKA
zIPR%06o>x+AN*F9#J}u{h2Yga^a3<#l=Ggdtb-!5*y2pL2gPaqC#N*}gmdsv8qCPS
z)HXJn(E3ACds~n{(dN0npr-IkXJ*|SkGi)W?_+oST`4#$F^Y|Z6UJF^81qc@ywG1+
zpd>IOs-IL4!#31Swel>ntgSW4^c)Z2!BKT{c^e2(3rF`?IvkIHBqUQHWGVpw=B(|Z
zhBXcVrOPF@sIzfA_trLpb;GLuTA=;SmHAT?ipmcdpf!yy9a&GfN}#UNG*~RuM+B!I
zbxzECZuuTO7S{nqxk$NuU<#|!^!xa(4AK#XJyrBpt8TvIKJ$ugv&!-xj~U{s-?ccQ
zqVJLMPwf%z;Ya47*KQZ~PSItKIK?KtZ6x+g6zp&^F+u2tNo_=T3(9aw^FlF7JBi0=
z&uA%qa?%@+$rwCVgF}d{j1-VFR#!<1+p&!R<G`S_?5pr`R%FY+r9pCaY<2WEO(6<+
z`>E{rv;`lN=AU}f7_yw#X@hKkBbGhYTU843HmDIZZ#P^MM-#glb?3!Hc{C|D!;RIQ
z*7aq!x{XsMnDs_D<u^#OD|cp-*!KwxIH+zD+Cmg&lE&uZOJ#OlqZz2|^=me`Xxc!|
z4lz+eUqbl-lS(%-d(ukCeu1Jbmg2}_Cie&Ci@uSZ{{W3RG}}Y$AM%droaIy-zRpLQ
zM%^g*k<~(rY|<D?jmOPNaeoK7?ZuMW`l&7LV%%okmo+y!r@*wM%Llqh?c*#s0)&{)
zac0b(M-+YY#usa@;q_42M(%iAN8nM9R!!g<9hiknPm<)-eyJp@uEwaO#7}jrk~qR1
zvsBm77MVz2HDq%9n`2}7GS2IKc0x$cL#nOQrkZJ39EJSVutL^&Jn>a1C4%Q@2WuLt
zF--<Dq~b?&Y6af*9DUR}v;uophjGPQ-X<-J6V57@!hdpC5&~2jzZ;WR^isKG`XY;K
zAdb6;`>CY4jph-7f@zXnKnJxSEPA0=PPluI+%x0oQ#HBnkrGMr81|7YzTxG(P*?X-
zFToE#ihalL1c6s1QWRLww1X^aDEVPr5!lZnpYtwBsV}Zjp<ay@YWD4H&B0(Ql3dR-
z3}T)!-768a-zZx-B6-g6NU6l~{_A8&L|=6ZK+#4>PHJsFW;uz5b5=CG2HPRMMP|tE
zvpUo4pKAA7O+-kyAP?edlT@?@Qs<nC)2C@QWpb|tDn#-=;&DW^!p68bq;OTjii&%1
z=NbO%e-E%kwbNxT*j4zSH9U&D$tQtJqO>vFlq<*`)E5qL`*}VqBUd*?yipVqp}B5)
zqFo+ifG{eh?Pc5@8x5Xn?!hH!279N<l2(|hv9{1GO_;N~oY6}wpLmeJijV^X?(VrR
z8sW+!{{VGElYEaDh%Ic``*En<_fjwYlK%keWPMZNwOHGJa>lFm+i7Qy5lB_P6ctI+
zd;z-X_oT@`9t{$b7U9kfGF0}?SE`JS<AYxFqrpB$-NBK)D0FOb=7@$}prag$LvQX9
zQ1eEwpkLU<=d)4E77pRnJ@-a-u;!FGJWwYjDsVY8qYc5Nl?fSISccW-x&Z^#wCFFb
zUI~ygFvm6E->5#0xzTQ3co4O_9D)Z`^g5pgfz|ZeYx`D$bPlXBf;z1_ZA7q2*F||C
zrm3GRjln@2eb5@%5yBrI%Dor$1JV=e7cDd~lWxb14yyyK$BR+8bzYMmXy%6KoXO*G
zbj;aVWMSZVr!mG593Li}@;|rn=o)TQZd<cau_$`{qa=}m)mnOA)AyR~>cbRlBM0Qi
zs+-cDow(NS)+p74Ir%kvm!|YvT^h+@w?G&UTgk3&vs$q&vFz8U)4Yz~Lp>+y>rE2X
zV~#c7kCS4rNcnJh^Gpq%DILAP_pSt{<n5hU#U)6aa_5feRX1{K7*OusdZJK*Xbe$$
zmqCvD29hM(DvW|Ei_`k6+RSwAM=HSKSl6Ja$jPkls&qJS;b|dIvna?SnLi8+S=6xH
z8zaR=iGh<;KAhF0w75EEr;tIyvsSWLM!@OLddHd#P;3zyvIk;{-G%z7++!acQa(mJ
z(6Uv9K`B`?oQkl8#GP<zl^+e}jkdTT;B`?1xJC@S9ChZEehq?hv@A`$ACRfzY>kco
z05k#=q%h!9fj&h*j8l+uDq$caCZP(iQ+j^KU9os>VhnM^V~}gWeO>7rjT-hubpHSm
zZ-K{(`bAvi<ndJ=r_pV-yC#ZN8?o_hby~E#r-omGjQAE+`95ip8Xxh$j;X7MoFGz3
z6{Gc5g+7gb(aeljJTV+q2$f>E5=Nk!^tk64#WZne2Nz5yPJ|1vxa+Z3R~Ex^jh_?d
zsVyEsb?%-rYX0Ksn%Q!=Bdk@a(i9HrDEDj&$Q{*ema^Skq!#F2LU1rB<Xel|m4e+1
zNXHxs^^ZXMHa$Kyy}Urvo<Jk2!>rZE41Xq^7$ued2>$?0eGd+w3Sa6GhPXV8j;qmQ
zT#yuC3Su1LF@Pw9Rs*b8nah@0Wx1WKc-A#YctS=gWr1Xck=G!Ka%Fp|8NL~+hB7xR
zb)jDV>fMjDPsv4Ion8$}%t;fC*!e9tQ;BAQmBvA=CC~0O%i{18?dOq2I^mJV`7dvz
zCQ3g>0!9GUBHl-Ifz<QlRlbb)k!Y^PW#+SB8qH^oeo@rg9D-Ot9ggbXPxj}x*w`S7
zqG(@B)R-P2W06*-Bz}T5izu$G9vN_QD+6w`XxgEVotfgj8dphU-S=64QdoZAhabs)
zywGEy>l??sof;vvSCVU4X)tsGvc8?PXl(#F4O)vR@PmpBt-E1iv%#wEcT2X_EM>I0
zj5;{SBCIi8-v**|My&Z~^*7M(>AUBaDA}#tk_a7ELrb|Izzl)aeRJxK7SmX;lGf@l
z!G-}Ns`;zdUXZ!;#;qVkEd!jZV!OHxZtsNqIl7D+eo+F5kT(O9^<5?0Xb+b9pq6zH
zh9yQQE15HLHk^azy8DbolFutlb`B3>qG*{IxTRtuA;b6GHfe!esmUgVkhS#6+nb+y
zasVH7X?=Yezg?hvK=)Y@?yG!B3+Ak#mwY&4IIObl=jV%ztly!AdrxxC;0XfqD+PS=
zHlX5n=;DfpR4Es?4gnkzDz@kuXA(=1=QNUZtgaDrl2lx`r<QFs?pkfTm*dSd(Yns7
zWY1wSW{FM}jRLx?lNGf|54OH4`%Hi^QWmqUYf$a34S97k&_b`0<!&dt8Fx6Ntzcq6
zQO!qTYFL1y6?0_)jO62r+YC{Hq<Osh+2)nWk;7~k8R4j*2;)4_CJvt<lTsun9aYgs
zKQ2dO1O;$F;MB<;KpRLjK#%y5&M1Y9$<V<R#%C*Fq5`BAAMH;NhB*Tid{(U>$rz&J
zu|R<K)ElBl1gHB^T4GZc0ulM8_7xChcUFeXh5GzdQlJBA9Z<U(cx4_~6Gk}NwGoJe
z$8}<eM?*B%h<F^+SdJIS6gJHA00`!rb7;}Y?VjkhQjL+uDYA(q`IF5KY-FON`6}dz
zmnwOoZX+8y=B|WWl&g#%x`%8k&m4=%92!X)u)3~H-lt$L>{=;sLktS>k5XxpY1*8T
zF~Vo!Yu>LTF)2~<&3X6gK9Vjhjmj_0DwXkPqtNO<gFK&6%P7x6f{bo&>WaT4M~-qR
z7$#&pz|A{5C5@zJS=KfFMAxT!T5Iht%h`fy0AbDzVZASNX0ro2e4C?Qm!RBRq(xbG
z6Ufa`ktsT58M4YeE(t9~&BTUBc?%CD)|b+nwdR?AkM2I);DTzh)^$w2zz5Ym-Q28z
z1pwx%$&M~HMObo5GEUE+Y_0{xoO6%$C^WnGHZfE_i@yENgA*RtioA`%KbrINjXFJ4
zCAlU{xW)}V4mhaPbDD5IDF-G7lmXpFna?#Uk>-j3(lRP3aCkrOQb@r@MGY%%QbC~D
zQR08BF~<hBrrki@{IOVxJ+lJ`UB<Q_t3;i77k8S=-1{pY!GgqBt@P*q!n2L2{{X1W
zMx=IKF$0X$f4EJbH0GVEBmU||9E|Z%V`5>)CbqLMKq??3a2vUxCRqKi<_j8qy~JYV
zthp<U3W4w^C08TGR?tfeBJX3hSOO}d@g1F%wkqn;U7#+i0s5+)RDY8oVB?yuYk`qd
zc5nirq>g8|bqd|ok}?$NZWQ}9gvzDH;!S1B>O7X?S0sBk=`5}+N4iM|s@ZDri-|ys
zf;+5V>H8!$$e{2?6|MGfBc1M5vl;Mf<i4-Ys!bn5^y^rVETyeEm!amRi&9Af;QVSy
z5AULArBDf|S4m4%U8J>QsI~I!*SQ)iR!QwPg~1|%_O9(q6Y@dLNoZt>AmvWcOBS&V
zq{kq@>zc~q`J_BsT@SaqX`}f!A63%d$7(rjk>FID?LvHUlS8GYx;dV4yFLvnQSutH
zRlfbwXOImaWdao7U{I+IyO3kTkD*U{$nB<BHvn6zMy=FB`a4{>{hgZtM%;30lS|a1
zmA12hKIp$tSX^pxh|$WYoK=7QDJ<him%qBvjxgD6@=Hx^G>8LQlC7R93mdr63K8=L
zyG@4{+czFXSIccAci?W|nx#i|k~>wV5py1HKXHOiC}F0>eK3+p$H${q#{xWU@k7S4
zE?G_%jAbbF!MeqiG)e5K!hwpSi$;zuQ}=3El5%S9E2w3c+i{X8pplcs80C~Mg5u-L
zA$zeLWbDob0d1<=-O9_1V~RF=V<*K=9%ygc*+It8tbEePZ95qxPA%=v`Gz-8)5__@
zS>01bW*qKW4&SO-V_Rh;^ZBD_S4owmUM!yMqj>tOt2vSF`r2qa3&n8RUDXBE)Smc_
z6AF7Y%{ef?qD$+6xctRUX*Iv?%g6yWS#@S4yhm3%M`DQ72ul}4P!8TPQ)GBn6!@eS
zWvIn8#syv~Yv6&@j8#9Spp-AUD9Nd{$)}1;L^GOa1;1=Warp(^BTBna<YTI>65B-H
z#>_vOvC&<c9n${*ELCo$3};aHdgh-Wn8>ZFbQCNi1WkdO8sb?Hkc6IU8;gm{s>c5S
zA2e`iBemRlY;#fCs||T0H)@u{dS!=nIK|&T#cCL?qMlY?1H9AckrF+zf8w1-@JGO|
zAcT8r*v&)im5@o|h5fwp%kIbs1FAMAA#mX0oyTvIBqim-6#h6gHLPy3d!&478~MM?
z5f?eB=AD+^VhpOpQ${X<`+5^o9^edzHB~-Z6S!oGxPsB6w=T?019b4Eq>bjT#QCIE
zHC!26$-+sGIjL<*xyu>~U>a6-`=)#5KC7#;)gmqp7MxI)1FB{*GjYWWq^zv|;W$Dl
ztH~BLEDwsyB~zlxv<oz!<?o6*8-|J;gT+xpat!6A;1k7N+eEU)31POIEH<u4nz9%#
z+Tod93V?f~pqd0z?XWCrGwc~Xl~w-$YJIJQcJdi>xE}_ome1&WX%R~`;C;%ZEm&C3
z1c~z)Z#ki+y<dDp$~OKfw-;7$jm&n^xb9SuI#$np+%xhS6%(cjWdwdJ=bb<Gw^z51
zWzDIPhi4Q^{=ZEY1e$yQ0L6V4T1>0K^Oc%EXS@7Z`OQ8FOl^WrE5MKI>?Gvr?hQ>J
z>&>i({{XO%)qkhRybbQb`91dLBPxbk69qCHkZZ=RKVGb(Y(0#dXxII8(I*-Gi-S?%
z$@E2%KWDDS6_bO_Hb~AgJFh0R{cF)*gKFZfApKy_*bj0B0;0i@{{Rv^k^4Oh{{Y66
z5C<bQf&T#WtLfPf0|vN$sCs$0_W-4MX1oli7v$P%b{5)&vqNzjvJvLI8}&=l66voj
z&_+xLk%85EbRVfcoOH>%KtH;RSM`^p^*a}~p6(V7G0k6tCR`5E;7P_aRL_>ZvW`@@
zXBe#?q<t?ww|1nV5UKew6nCgJ-3s5-L8H4zwPxMHt;Uz??@MWRi*Ih|xE%g#c70s7
zd>1Uuivn`_J`Fa5quOaUi3Aaa0CI8NSpo;dRz<&1bS7MT%0J$kzo<T+9Dd<{=D4pO
zNjqrisX{7~HdzCc-8g*MXR5{z)DKUS^SVrbbV7ch`gCD`a+v$g1&V(}-ygGKgNzQU
zSOCTiWEa=ooyLE5nEvTg^#jwdkS@`G-lw%cpzn{_uiSYR9`Ta3mVaLQbYKhE^ZnCX
z*M6ODKYJKI-4AYmMBg8>e^zLb-$Z7FZJBulP(GN~UJw5OMru4d@Qi%Zr1j6Iy8ZNc
zKboNR9=Yke#1mWJ#FDQ$28?4R!$fk6@_HMtP!rC2G$RrT@@pOGkJKF`Mf*$pq&s+5
z&1fR^7K3tkMW{vp05m0zQScbfFUe;a`0g~|aKmZhtAFbHSqB&Kf6YD}M^DB@<YfN<
zVv?Bli-=@$K~ieU+Ca<6=Cii?ov!}?jnsehM#Ft>MpktH0FgqdmR?5Tcs$eO;{|%E
z*smaQk99NsiY6<R9D9mK_fR1~tB&w$L|_PC6f(N!aAhO=6)2Gya7KT+0V{o5pxNu1
zd&KAjnE19Uz;*os>qgaD8PJ3W;(qJtA8@X6NBq`*)*2PQxnRo5qEIreTeNz&hE)18
z>NM*j<Fmo!x`%GxZw$U@yJ-L}LD9aY)V;e)MJ7z+flbtOOT^WVnj<1Y*bVnyr|1tz
zS?JcQG|Z21+>%J*y4sy|F~Z~I!HZ2PPMJTTJtY>I3f$cQ_XKC36}VH8!0LsJrf?)&
ze`1MQ5pcVg{%g);$t<$nBe4iZ3ZhK7;~mio++do4HjZ~L59Cyy<ZI6L{{Wh;48m09
zc@)w?{;B~mWBivN*sf^<HUlm{u|$M7S!{vTX0@mQCoA1*91h7J^Hlo0LPR7H$L50y
zF%Ht&PrW~mc@?G5Z&F~arw7QYYxsaCkbmBTvzkjwp(JPH<YtN_x#adu3S7o8YHw4+
z?&+pgT(Wan&r52tNgA+m`RA(F#d&D){Nzj?!KW$mXEYn5a~8&2ZSZMsU?%VfI9~#z
zhFFHLD#MMXxWyl;AV`*9BAdmI$*s{r81YuH9Iq9gwbU(%ALUSL_QF(=K}H6W%ITn@
zIHr((>GBmGX}g#1p$Mh<R#VnGCBCg?Br_l<92|98nOmOdJEabWnKqrFT0T|vu7h!-
zUp#W5_Ve*-%^3o)81BBC^`ED0^=k<sl>vMmt&cVHr>XRNjT=&UE>!FX;&I(~v|7(`
zZI@>^QG;hIqO#xyUz79oMCrE8Gb$?NebU(3i^d8R=kB%IR-YZTK|ts3t<`GQ6-%&S
z)4Yz~MRdA&GDQvtKgx*IJvh4ox07y1H8iceQ3?*-y_!0Al>i~Q6q;t`jW%<2(%YG(
z(H51U+CypW$TK5(tnaMtTEYfYlQ`<H;?=@VH=adjJyCNWqpC)VNy+3@vRA>5IciE@
zMP`9(F2Orwv9Ck)`tN`Yaw@Y&utbGHUln<xC$V-{7zf2&jw&mIldaS|(ruUaXc0jv
zN345};5aoEvK9c<m6S_^yR+3?a%9}N8yE@#?W3Vhu#?_uIMn|D61^I+wv3)J#TF!a
zbPW_|Amf@g2txduiXuC32I4BO)pt@^X;MuP`7czCd>7)=+>&OuLD8ht0Fm&Kc>LD3
z+d#C9K=#2k^Pi->b?Rr;5dP&-2;Un|n)-{=(~I3I3x^|fpeC|=jh@Q}j~*zcMK%8b
zr|qST>|L#nD+{UFwWZrbFJSjxxpGtvF~xbOs^>}dhE>KF%{(OAFnXq+AGf->DmDe{
znvKxs^F%oWj1C1be=n-gndc=H&^tzONv5J8Ib+2V_#?U=Mj-v-qU1)DU4z^!Z^D8{
ziVEi?j^WK%TwE5$6apwK$ik>{+0S)9v1IP{fXV{KSe&1#q1Iyep^+JPDF=$Ty|{%=
zH;?(M%ZrHE956VlW|7m<`<!riN2)Eoi^-$gj1p@;rBq|?y({$tNtWvErnQeb9OAK7
zUY@_3P)KBf)k|{5rsUD;vCiU~B=pN$wxJNXY%Xinv}-V1c$44;LHt%?Peofmp@ZST
z2Q{J4+Dj`yC4IngNZpz9NlT*PVh<?!?OGF4ziT-WTbT2W)cCKVGJ~87iu74r0<s<1
z98yVIE5{_I$?RT$k~PrcF-#;@=Y!R1VSJkK57WJV`$E*sq|DD`&GFBQ_Z=$kE4Y?e
zoHCK(xH<9466EywGDdM*OSo8(Q#Vxv11dh~h3lHm(Wx;|k~yR<X<q?yDkI45mGCkg
zTRa+qb=ri3QYr9hvw$Kr7bJWh9r~nE_3Sa=Rxa^^$O40k)qbfk9;I=dk0P?J;%JyQ
zIO@HIfq$_RiKzYT%im6`cPPgs@k38AXz)`f{{X_5QDGhXJ1$7^LOhJbkUOo6&N?{5
zseE!NjARTlIH#*i5#@Un)!xmP<NKv#MaGc9qHX^GTvbl4yCgjvnzPa4aIDS3{nc)-
z7UyOSxqlRr=wwq}BC3|-C&fV=p46zod8aZklP4s7R5wlr2o;t})fOjA^sh>4_%JGR
z{0vvIBD?6JiR7+p!T$hEWrkbT5)8I$-r~E49XVrR{D&3uFY-)L$C)Uj+j>SvJZafY
z1;AfD994u92}mU7p=)bTZJAhZJk>n6^4d-9By4hVn&7DKUlX?Cn>8}rnIjNJ2j+)+
zSYiOmgP%2BZFJ$+D0dpXc+Knq9k42iMZwU?qpecZklY-8Xp7Hj<Jg{RzI`~u4#=hX
ze|0sCGb9;vk}9by@;_Xli%WlO1}&N%?#*s5l2iVg7Av{ty*BC&(dMpgTMQ86{pvl?
zc1Ozmj*sFn!vd@IFpj{hba|_5yevnN)nTqRhFwwYk~U&|(Zva5d<GF!X2VWpF#tKH
zY8J=$NhsiXs*Mur;9PBS@%pQsQgGJ!Q-Uf@I58An;FnOa7C%)?Cze4XqVOtfOW`;X
zvB}0Ml1A3<N_578e`MRW(V0`I$E}qpL~MJl1@p}XgUGITD9s*_#ONJ>-S<&k#EgYc
zAn{huB9mNn9nJ8-y)&r2^{FnnVfRMQY9)NJAM;UKSpME7_OPJ+Rc^aA<-9wsw=|zr
zyHw}!;5M(a(m^CBL}xgtZQ4tcR&^i>8%3V#TX6SiRlMepYq&xkm@o7yI7%*wV`P%n
z(Rct?DlOV>ObQlTXShD>%mDDUEEcoEJ;iT_)kc+f$SD$ty^)6G{nU4GtdWwZB#x=q
zH_?d6ag29W)*7sGT_0?30)mX1w1$FrWzK~WiZUcz9~8}2E=kArLEdX3JDAX>38Q7W
z6T%A&aYmeZVof(IEA1VlYz&c1zbe^nxHNUBj|6d=r}Z7#iG8?NE=^mFZ<hi|J0r6|
z?oWKtGod4II5h^Fx*KnG$0TIbmU@JDVZtt1O<5?)+YFj-gQK&8HSgrz)QP7U&hT<5
zM;8)JouxoEEcbUvp+VxO%j_+96H&Vm1w-HBoh+c)<yR)16|63v^v8448c5052>c3`
zfYqu%P^Y-0<L5%^HmxfwmMT`Pgl`0!@x=*x{_Fi!(UaY1u8>xpDSJ~Bw&Dd<-Nu7@
zH<~L?KelFR4+e+S{$w(J(?f{@aq@~Ay*+dBJXLhoicZ1euJs~T9l-Nc>oDDu-E{Fv
zO<1WfCBB;<{ifXV1u|Q3@swjt((YAqjKGuCPay%zRSmT!TSaWPP|7xA80xIF>s`sW
zc>qvCXw1i+MH-UqT&*@qNvbB*d=ovIM%&T8Uv(0_$dh5qFWof^&GW&>)jcnxSN7R(
z)m2OT2{?`fJEQ|A$)ul5x4lp?{`CdTy!VU_?TJ#mO=So8{#fFLKM#^K0Bs>)%-8@`
zMze1o%&LMv6?t}>Ajuis{{V`vx`h7N?K$GU*(mUQp3KCS=ssi&j{>?kj0^<9`>EuM
zVzV=O>Y+`A!0AtBs7MwxZD!<VpAyCxo`|NYNat;hxO6G99gWW<)S_83I;^V2emSi)
zCf-|he{5p1(YlfI;}v_RN*>o}JPxYJB3N5Qo3gQ}#XQ3g#F&5|bkN&|N#>=JKw<&9
z6>iiU7$JZbRf$TPgjmMWF$xAcqvK!kWfweiPyp?nryq)6Z4%i<e`@I9xCMTwV+<$x
zE(tV#sALn3$2sDIXh~LPZbwy1icFSs*~UR&yc&i(7#|z32{kpX#u1gye-#TBV9(H@
zwF21L^!s&~C^=ue)r?v^CN#hYG!?VN6#IwztKB9<F)1hgso@0KErPy*4$OniMF*$S
zOzk-$v==U=0)e#DR*Vs{rv{ngi7k|JXdpj16v=cr(ZD3~D^GBa$YdY!LMs^;4!n-2
zdrRm$nUcTJ3NR|>s^qe5#dnd9&1`j0=^inW_f!+8+z(Yt#TyRhRhuTp15&IPu^;iw
z>JQPYc%a-z=7?Kz+n0g1mEnsf_ELZPO-4oJpYmwCF{?HY@239%nkrc1MaVgCB8jpp
z2q%F-V299s$$gGnU0?vw-_6hFrL#{^vte)NAMaOMdnBsADk@Uza6O<fKC0i-WBL!f
zKeD1V^&+tcQJ4|W@~1_2>SN046Y>85%CFS4jtO3IM8gU!H~<DTk4cI68D^i^M-HRv
zv`#PO{{VKOru9Fn?bwC9kdglY%Cw>%aj+LWc&gn}z(T={Y(sa&S3Mpdp_Y7~Wmo?I
zmmaQBkvklIXuB`eFH{vNBNP7g6!0<@_Tgfu{{W42F@=W)6TkYZ{{TgY=s&2)yo;;)
zk6OLN&O~yKoqM2@T=g~w{{W<>0RI5;sphnl#$?)XJXA;r8-^;yq{Nj_uRccmEv;@n
zQug9R)TSi=0P?A!x%GPxqfwVX{<TZ#+maN;!N*l*8N|DQ!1GP|JTLf>XUX<fPkrkc
zj?emxryu;Nchvo7jxS~-`>hH`?ws%7(U8LNLfH!ZaYOW2YxalV$@XHU*S%*RK)suf
z%?jSF^^|YT<e>imb+PtRKuJIvt93gBcK`r0-7K^?zKQybpJsAjT>8CuYq>}tMuxcc
z@2Zam=1hNeVRdMcvqk_lO>+(lD@VuPDyO5wya)9eKFD9zo}z5-xqtblC#-eyaqmC;
z)H7Se4n;9y0PX-%{+l1sJ^B5fvHD%=$)<&&xty%OlihlCrm-f`x>85?UpZ-8;*nku
zLzCjYhtV2NyKs@oc8A$L8s}>?iZ60z{VsjWYdtU1it<cmCpkH-P2(iCn^<#IUY@e{
zZN~5PiWAgY>~?M;jBbA-x$V+6)s<SzOKo-xWPoVRJ5Zhw3P^vdqq@7E{$R|w6c?*C
zS#*6B=7J+5;~aRUjCVRh%1Hux`}L~l)0&^`<Ao$6y3eow08zIDjn-@b02SlDr2W>X
zsY!cq#Fsv3J8d{Vcp0wNFHVb!-Py~@tCsFMJ=H(@i&Osqsb)Xoq#yl5S~v2w<NB`*
zeJcyNie!30U|WDG{{Yh<C+g;V%l^FSQ>V(^f&Qx>>rd1@1Jp3=XbQ*=4T{6r=w9Sx
zFawN=4YY+0;G7ja)5p`{7McUp$*N|G+toUClwd4mxAj(<cdMhkZ*C}<XU;1nWu>H@
zra<h~?v-y6N+N|o#V$=IHtqHg)=m+mUfcfwmgjNFB<B^NjTNofN~hvGs%tXV?YKvi
zSBrZJX;L(}48x;ZVz$PPCQ18}Y|3k<*;MUpAG)j2ZG@M8=*#5rXdhPU2xV9yKPGN)
zYX1Pz@$7{#JX1%rE~DX(`!!l*JA&jIw~`3q3o7#7XiYyQ6>*BWNDIPOZffI+DN_%!
zWoQ+blUA0C$YKs^r$%*5!ovhsiqQ@-1wHU)aeIOywZd*<K3suB+eBDk4r(|fZ{$rF
zkL}>7=glK*zA;^$g17|GdexfWSxE~I<S|A$$<I{Ja0AT>8+O;hj`X+b-LceGTbss?
zHGRZpz2e7F(=`1)I1=GX#-T<HdAG&E{{Z<KLa3>OpZThuBd?3q<rq^|Yjy8c300x8
z2c9=pQJAxra!Ve~Mh-^Rq-HAEHDn;7b9H*z>L1IZaz;3&u>@wA2rHUF@&}rFGT5!k
zZXfYc6Y<X(t_QJEk^woQROCZ2;F_V<C%NTf$*mCzK_-NnIP=Pk(G^<c^Ic?L+bNZU
z5P7QmJL{<i{kUJFTmJx2TN$A$iZlII7gCCQLNgIieB!fY$6TYkqSiS*l46P8cljt3
z;VOHf3}CS96%wnge&OXvpsb_1w7Y4gY@?j6d{tXc#`8fHu$>r6aNYr+=d~-91Ymfh
zorJci(Vfzf%?Gwc$dKUEl{aA1S^%m+!ShbH5Tjwa&zdR-npXgWk2HVnZ5c2C$f&@t
zup4`HX5q#&TOUO0t)<-i6Ua;?<DV6sHkRl;$lHY(HCu6ZuMNQsuqvDy&6^U)shhOf
zUN)LOj?=XZIigt->LL&S04hChE!(N%Khdv+{{Z}_^&A&Z9loh5AY*}(R(9U7^<t5Y
zUO+#(!~UI>^mf*=2ilD+U3;p3-k8p#kigf-)_$$^drGP`X$U@ZTD>1x^=;0@O<l=v
zx?l9H%yq1R^k#ERxRd-OSI?Ior1fo`$$|9-+&}YAi`5>fXFl!oU<!-UbFM=5kURAt
znLSl1_^j*0?c}Ru;)uAK?*8qrXMOH_CZNB4iGaQ<(rK~0c+^?U$t}`t87t&=y>dq*
z?uSJMfNUtuO?PoG?%0*e=7+g$_-&hi6>BPCmkSAtD#vpD(61v;5ZJ0Vf99ASp9{zx
zQ3-A%k0jy0{ApStW2VezvoMs~f#R#?w_wZw<LJ@4M8wY9S8&HPcB=OO0JjAf5>m^d
zk&3=nSvPIQDyr)*DE|QPtG!gnUm$K3PdY0Qz~Zv8(W~?%@L0^Q#AKTH4H2W%En~?9
zV!mgA<c8%6vF^R!^tR#Vw?>hVT~~<z04LFWed|3xqH0y_qfdVX5I9w1ntV3UOu?N-
zML*O~7Yig`%ByYXc=(i&ULH3n@Xu8w+OokdS+4ge8)%E)ei%uFg8l_X9o%aULjr%g
zmd$<XH<zlVCw9h+c;v6LZDrqe!YBf*B73QawiJ)EOncvI;KbvP>W+r?6iyg!@G44I
z&e5A-*3rcT-%L+6X|gv$(RYsMEuu*bjgD$Xy^19EV<5lxRm+}5=J*1FTeEub%rd8{
zd+1i}a&C%+lO8CzB55S|3Ff7_X(1V7g~cCnuDA_b;gwc!`*%;D29CM5{mOiD2&(@8
za7Q_qK2(~yu(*y?%3z#Ow{}U|DebKq8Ov?vj-5%1GH@zMW>mY{!@7%6W|BNFLW`c}
zN^RRCo@I&+!G{8b)34&5Mh@yoJ!YV~wUYM5(Hx_6Dznmh9o>!0l3lrRyCXGX#TN$L
zxh;&;Z5r3OgbkSkJ}NI}BbPk=R&Ujk-$!{2mN1gJ`Bl!J?#pPcGi_oy6(x^s6tA+R
zypu;XZy`p&0+rylQ@Ipp?xHrRq_A*J18^<`zv6O7)mE3}dQYNFLQ*!A$!zsge(H9$
z*i-|GbWv<g>@wJ)n(ZEMYiI_VJn(|~;S9HkJaT^H2R#a1GS(Y1#UdP>^+m;J#{A~G
zMhK)4O~1g8x-*`khHH&Z8Dl#_pB1K#GTM5mA275NTl?%BbAwj#Nf?MGMnI!Hu1PPT
z%A*xU6|9p-a#Bya?Lq-5-;QX@nANA<)hwF?WVig(>Dm#_%INX!>|_4a+enhyR(R7H
z<kWFW8ua0EqO2|$dqkia`W0hsIwM>Ry1k4;ouC>X-r@;Vw7d$6I9hki%2h{-e2~E-
zdr82el1Paot|SutExDm%vy@4eKRPxWf7+=lLfgr3_hpDXe(96r;za(5E+mpDu_rjH
z8LpaX$Uu0jtx+U!4tV;gAhw5YNCB}{;&DpZTWqa$p6CSKw0SgJ!fow9a$ECIT>k)x
z@D4evP@-)(ZtoQGmC-p!&uSqa4(l(d28@z3_^o8|rt=XMN2(&D;0owzrA~_4vK<u}
z^CJ_uRg+46x5&w$^u_{04-|5$nK&F8IeC?`)ag-6A(kb1f|7o!+e(;AZX|VXJXcS(
zFfqZRZCzpm2nw2$nsMj~GP@d=rKb^uA%4vp?b;R&c9y6f;?>UF0Dh=<wP%<*IaMaJ
z(tm==3cbBB$B2}%{8W}#0pbQ_BR_RKOJ@u~xf%S^onn?epagy@r&Z8z!RFT`pUyu9
znGz6(aPd-1`(GO}PiB@(m~y|>ea?@P8BIkNJD1K#Gy{@UfxsPAp1BqQB;eI-VTu3%
z&)KZaQz6lXG5s|$;%51SoKv?L$ruFF1C?!~rl0|11=}nL6>FnAD=-7$Rc^8e<nc#X
z$g#|%u><O!4w(z+!ho&MG(@NhfDS>XNA1fG!@81W*%}Z|?;@`t`)PIq7IFEdLGErO
z;E$>lZDu{@x+3VsvP#k9fO@PS0{oLzaBKyHubQ^h8KjMmBOWS^*<E>DFygbxazTaS
zSqK2&(ktYhs!Gx{V{kw7PDuhj!PP235rU(SIq_Dy6uFSc(EQa~jm@|UFHJt&{Nxi&
z2gn(!DusDqc&5X#75M(DHXtGe9GW45A`XegTtV>2vO+_AO)?UM5CAQj<cu^q2a`>X
zFJZ%E=QKzSs;Br{aO$$m;z@8<7&WxGDLWNCp6e&7LSeaJqa4*!K^q|U<M#&##R#*^
z9Fp95sfYPvA)JhOru!fPlh4&wL#A|-1wqf6A#GjY4j-=+JZcc1H0GU^cI1v~5W3R?
z2$@?Tj*VN`J2IBsK0khIH)D3@WrStRconzMeKmD;BFQj#;yqNJXu>xpJRs!h2^?}<
z$bkj{?AEtL^triF?-EDiue5JZ+r_4n?E+WH9M+RUvl|y`;8!y?vQl*(5w9K;yDIeG
zNm#>z(UN~PXnixKxWn6q{{U*!+S|kc3=V%ZazPV+6&W4YH<E4g%TkK)V?X?n(g?w}
zPyYahhPd=LktgxC0R9Df3c(2&+e+5RKZv8cUtpwpMx*FGJi^f|dqJ$msrqjTi)>A|
z>b=JHD3y6kpX#e#D{c625BE`L$sU;N99rn`xIa&|D_I1P$-EB&j2@oU6&+`{ZoaG9
z(i>3NjIMsFbEhPWl8V{<R)}kIF8L{z2>3pCSX$O!ZfIN#)1^|gC=A<AiuX(JOxWB4
zUfDDKR%cQ4O}vtqKv(OE(+^OOQbli0{2p(03LsWi&S(qN-xmj(+G`$^yNmw-MKT3u
zuk0>hlVifbed4vlG%_w3RI$oEqZ@LJvl18|6dlZ}Vlk8ij?G+Zu`<ZwHBfx!sHa9T
zvbPQ5gDaw{?T{|iS{4YNNmT*M6TqqNLCy_D0Wr!Y4T3tVn8&L6E74NTXqT6Zv~W~c
zv*<Hh!>7j!FdGId#JvythHc?X9YAI^;aa^C%hXy#$QdUb{%eP-j%iDjb+peMW5Ycd
zY8J_8cE~e|#NO?nQE8u%o+)C~^!Ho|$NbROx}KSAH$3Wo#%m|DHu^OCk?O;cE<QY0
zPYiRyW1SVFJ?68sdc#dXL?<|`Z>#-veDE|@p9Sk8v}4g`ljx#(@yE7Oey3@+wz|0*
zem0ZH6_~VE0f|G!beGpREM9pT-1#)H%^_iq2b%O4G0G50vyqlcQf&ngWQlih6i|$%
z<pD_mc%nq=t`v`n?xVbx7>RWAS4gb{!E(|k^2Xjcrb+~v!NK!F>C=0%lmHCT&k>6`
z!Obs%4fnm9aSC9u`mF|mYLS#J{Fr2uRC;x{xpoOcaa!A0Olj^AF^+|0Ym**XwP>+{
zgm0ap&<e+T?_U=1yl6tP;0n}V-$7-iJb?U&^Ij+FZ9(-5=aGrPJW&%2H^}WWc`o%u
zd04T{datKhb}AFic@Bu-Z_}SO>t2_^#(OkkQR-3ZjlP_u<DRQWW=M<?_0?wVjtE?u
zzO*HPAn+>Wk2yEBg!@~VG)hC&&Yy76ph-p>^Huh&RE!QyTO>i$GmdGbQweVV!(x?4
z{#c?F)Tqg-#<M4tbDyeh)HA^WZRCMNg2?U;y1fhVkK||~ZX`J;0-@98LZzJLiK``L
zDfyz66y%eakTiMX1n%HbQBEaFg5#AS8WP@hhC-}+sOz$*<gN=<Cp5BScYM@p%1|&o
z8tFOY9MJ5#=?@L{)gx!Rj2e7-k@kupT#E@zF=NQ;m_RL%Gf~V6zTY(HWmE@tcT|65
zi&wRckN~9h^eAu(HsexUN^_Db6`U?rhC!*#6XUvwu24QnAQ4oW)KFYmG8n#C?yhdk
zWeX`LsjmwK=c-rAFN1NH2b{@v_BZWqai4EIRq9*Y-q`T}04#iYtmm%ECC>i<x~uf>
zs71BNx0*w+o(VNh-YoRkBPn81HMX|*ut~Ms0eYz>TgRMj85B%9jkUyK3C`}Cx=XaR
z#@7v+PKMiLvfMR|s1BgW!iTrG!jrq^XbJA*yt%;#V5<vwH>Tjf#-Lx)3WiPg(niCK
za%(5*d*svfxqZLn$GX3`zDQzdfnInfyyMpzlFur^83hM;pq0qchCgLIE&GRO;2I{)
z2J@LZHC1l=4<xVG=80Co!sjIOT4HN7mQyl+>9-{JTa6l7#0%}W$axj!_Og<yi{rZA
z`g%f^NwtY1?yiUvEhlFG0M(tc!|tJ37DQ$(kHt3OWx1P%01pO+xg-y5Mi1RtLj_p-
zL=wR6%`Nm%GPwgQw?FSu>7|`WToOB}FPWl%s;D{dH5ws_niqK;j|vS7G+ASV)g5_o
z&2+$kpmqfe<;sv_!uT|zkwD;(j=bl(ZR5C?Zg?G3_N5(Lf5fkvWa?Q%kT5zm2p6E4
zBe}p{4|%Iw;S0jw46v%rCQti-gP*fjckzUbvNm(wEm{gOx|^|gMd3|WDj@|)&MM<n
zgqC8T?^NO)yO=jMmTH*jWty~?$r<Z##e1ir5p7a1t)4lr3bl}=!P~HVuSWDPxbaT0
z$V+E{Yt8=vkZJN~!SV2Ry&p6&<f(Z*c-%<kn8rvHE#QjwBB;Ax?yPMUpG@~gIb79U
zv#61VK<2({6;1t~**jK%qSG%e<9Q`=$>OdwL}0%s=86-m!R)XFPnwk=wt~>Iu`S}4
zE>|T?DYiY*+jeFvx}!ExJmOf}0;;C6lHy-+r(_x9nm*FrMJj?~%jDH=G`lQc5_^c`
zBif2VpC+KU(hAGHWhMOcR)4jQI7wIeA2c0|R|epM$8LM1-qNHrY4c&LD%t_>bUQRI
zy>`uH(m)I3af)ZDEG4_N+LDzc{{S@o9@NKZkxX|GJkUxiJDdLiz_(nK+GVxuwrwQ*
zXQ5WQ5fPR_ki+h*ZWC?0dtd=T>52=N{{U8brj++Cg5=t^N2^=jO>iC))f1<Q8a<P8
z)Lr7rk#9lRq3oCiEXU+&YSjVH;te!SZ7$);lhqd!??6rkHtB#}(qglE=CUlA4p=UH
zQI081((EvMyEIcoU4SXYLv)KAN9B*jMW^bGb!@JxcH_KLTH@Z}@Fh}n%I2oYF3}ox
zRZQ2i&KBEt(cKoZO>Uc|Oqw=VNMj~3gZQZJS*}4c4nH)~nrf?p*xKct6t8z4SK<BC
zU-ab+TsB)ZEc;`OsvLa*L)fj_7H1@>JWyBK;1|WBcDj-d7<@$qd#LYqh#?Hz3U%Ca
zNjPR>w0NRLvO1#`86IhpjCo?(e2xDAHNB~9^fQXC(ym_eQe%EW<W-f@9gG>c#S5d_
zq!P2lIb-6QGK!0}$kf`fo4Jw|+abe!(^hq!4XMb*Nvhckl?C!KQP^9oQ4~;VRc*f_
zEwlw~Xzwc!a4LAe!zNE4eba8Y!7s$&O)PP}!w+oad8;Jb<V9?nYk@l95HH;bViEka
z@-$4)v`LR5t3uq#a0ovDXu)4ZK`7HSfnSuS>UWXB79(Gfq`aDBA!AI`DQ>%rvz(LS
zoh5d`(CZ6?xnJ{qPmI#uLLywL$TavZqh&H=VypFO$R6Bnll4Y1-7X0xfwru(;m=e`
zUMj!sSCRKsj|}EA>PTZp=&>&U0P>8#-D;Fljg{`c0q>U*d<S)$)&WVkF~F@(r<ms;
z4l6IN!uJZ}k($$`?O4CjUk$@FgW4JAoK?h9vZy3;RT@kmwvYmIR-hJ;=c8J(rF4c(
z`5N=9NH-BiD{o(h4;58?Y|6P)mOW7#bQei)lsRvsRZ0=tX@yONEs7b0rM`bu?|$T5
z2z|4+?xM0=)t`OB;13mJ`?x_Hh6t);+I0CWO8G=HSo_QjlH(o)O$>8e@VP5P%=61A
zFsa6Bd8U>qa=?>F_ccLux-iSRh<D*lOyOgQoaUe}C$uoR0;74Pk#_CCuedYhg*Li^
zQM;^mYMxLP08_fJuj6Kklz+WbM{Y8A91-HP&2md2Xplp{c3AkRp%Jry+!57HE;F=t
zDhCCeFkBiy6I{erQHIEBd>BbpUZWw{FT|1eL<K<}{N;y=oe;aTRwx+`D9Hq}r_1>P
zRoYzQOME7G3bR1PH@kSPE0Q7@wxXTq2BAl0eX*WCscw)fXOazdnh2ZRCxb+Sazu-a
zoK|{v_U2%C=Cxkd_9Std&g!r#-ea+vuOR^yxnkaWr3tWmg>ouqz?E7=%tPv%C(IVk
z6Tqs7L?zrnQGri+$zjhU%{DbuKOx<8s*#_N;Pp_3k58COZl#;XD5b{XgOT@CS}cG$
zM{JL(x<z65O1VnW31!_Ajg63Kr&e&RJH-l_m52&XI-`@}4?W_TAdeiI`sDU%vtNL1
z4yPnl%X^$0aps}aErj+=%ibwx${~wfm%#5eC~ukEIUQ8%3c(@gE%ia(N;i!0_f$7T
zJ2;T;>Yrx>+J)lVK!u%-1}eVpbp>{u)~EEI^4&d8AHTN;tkpB0(D6sAex81tMQ$Ry
zy-_5moF5hGmpVi+TOdfs6#YKtAKJyRfJhZ%smlu4$GH57u0B~gw`W5dQA&W)-)5B|
z3)hNgr{#1X<V~kM3bK$O^z@8d4tgf2^iO>p4dO$e6ta6pin}(pdt@NEqjn-N+tHvb
zB#fw8Mh+;L<UZ!mel$06$u@^Y8f?j4MKC5wIH#nCfI6brVTU*b)Alb#T#%!Z9)Oz0
z2avH|1zcT_Bfwvg>Vsz65fU#oT&u}sQ}9&7V{X?CEJv;?&c@x;5bcn9pzWA@Z2L-`
z)pKI;ylSM4)VS%5kun#D!q9iHZCr%OtKkx6p}MwGIXqReTooo{rnPww{+=iuLrAop
zJ-H%MPvW)GLl6Xp6f~B38QeMO(sH>&+I^ls>i+=K*zTfdvO8bA3h|#+=sL!gF%!Vy
zSEd8H`v};LxnYx3y2qyMwFuc}g<KKjR*ZU_k?)e3VvM>zEq(s!-lw0sa8b04nXh#9
zujwzi?Qy0Jr|P`FRrJ28rd%|0+qeV$SmL(h%Ng`$mN`VslwdL%uYEsrF;JV^BlAED
zX9a^PG;NxQSvK%V=Cnp7O`6+z<Pe7iix15*>5QN|GjrI@GS_fL3(BzOq@FfWgVkR^
zuxp5+!1kM*b5%N}yp3$n*d5iwqM+T?SKgz~3#iUN6&m0MXR=jI6KrD&Qbyno>LrVA
z^})$B*&;Cv?gXC{oe-DY2P&sOMueGMmJQIT%&i+_cTN)_jDk+%*`;iQwB{=|Ltxf|
z)T<xz4b9@R_QV^2RGj9udQz*M%s?WV1ZD1x3j5>Y0X@~TTQIl?jo)IbY-JG7xYf(M
z3rD=&fCXolXFDsoGY|Tgs0(!cq92!>SDDJ&vQ20`ZG6{yn-X(_S>NTF3!`obZGe{S
zQe}1UYt?-Ve2$3n^MPJgfQ|^Q$D|+p01n5B5h>O|N)~$NmnfD&js<A#hRhIhwTRGe
zM$n|-*2hnr1RJ>(d`~B+hS$k?YP<0Hz!hn1+t^3CsjcMW;&`hoGr0VFr!p=&Ey5Vc
z2fB2ziD6|Uu~0!F-SRmj?ub?-(lpHxXwQXPdD$O>;)zRvkL8*&^2IJPdT+X{{{W|4
zqZ~FxGG=_3+vs%nD}zjm(XK$0ouE_T(_FUsn<MIvz*zRL090u{3ge2C-Znpqh43lr
zMhGVYqZk#$fGSYQ(5y&2Qmm~XBf5=TWag2W0pD~$opOa|EZdDCa8G>HOvC^wj(Pml
zp=@;WXCb>CR3*YhS8hr3QA;KX$>xWiQhKRGQnzEJm5~lAi&45DwoWL^%c85cvi_jI
zjvIL;b^`*dlQ(JN@<}9P{cCc!H$K|8Rf)SsOOK9t^H#E4z3uZpatWbt!H#aSXr;mI
z@Z;G963a-}W`Z?|vnSW$wt7@j-XhN;AHdCdoDMzZAYkUbH`BLF@I)hU+zg7VBvDp)
z(zIXc_OP-MAP7OJFBE_2LoV8$IGRYaw+5pN37ZaZ!|tmy(=poRgT(~S(~@vadCk}b
z*p|x!=Djc1(=<A3%Do(pE6Su}H_V(GIL-L_7u+(q!u3SKC=zo*!IcF-<k7Y;q?zBx
z)m)1UC|E_dL{86jw9}qL1Z57;Ya18DM;RcB*y+ro)dDv{uYe0^0S-65NvUTLl_{Jz
zgG1XLz!Sp?lJQ8}A;4;|VYaZ#Zds7>IjFU1CwNICu~L~B$h(OI_e*}vEgdnt1G*$J
zq-|zq`96B5nVkG(86Q<7agrpHg1P7JxkN0PCk^?eVvG@?Y=z376tb*d-p3nC^<5&A
zizss0&2-a8JZeb!sOEr9^!ln@Z)V`0s@qdi+@oz;q|?+3kQ+VK{Dm4kAL0cNqcf>S
zsu-5|{nl!B8FHqzy22~K!3YakR*z#pITe;`k<cVz-F`j|Z$D0;S*9ELDtfF6A8JhJ
zjMl5tTBx?YLZ@gZxqhPw%LJ57nkhy(Q<3x~o!oF~f;SwIfkJ-CC039&(@^O0$EjLG
zHv`6Ky-WN6M2&()e3o(9k;V3X3k<FcX;ChwEJ;ZTAbG0`EmAqvPI&sL;?v=|Tuz|p
z&2`q6ux&QuAbnHGV``scUnRZBS?$6`Fvra@2rn(6E>N6#s35rZqhE95kGe1ZsEr@B
z$p_73X|5=;HqjNG%(KdU#w+-%2`}yJ<I9nnsghOSwgHzvR7`eoDObA7J9so4V!I1g
z!=bxN<@yz46cMZiiNVEJ+uB>kH^kV_ibT{RX^p8rDywa`M3edf)wL0AAdR2mYE2&Q
zCqEzMLtp7XySqtUzANII*2@jri1IP@R&9pf`7E_1NYl$9Hph<@S!SsrW@i2;sw4@2
zYn<_ps-6gLKYO_w;Kz|k6vR=BT%D?=%igOJL?gvE>qoZKqENs94P4yYv&Wys2^e@E
zk@2QkXBwa?d<hMw?Jd28#?UcYn=K|weQmB9Qe$JBR-vXzpKN*0nseHsrsw3-M>OMW
z>?v*Ki72*ccLF$|<C+pm#=y7YlJN_|(Z%PpRoZ@@*3uVQ31WPjk%Fslk}gX{v60){
zV~;qfukIcQ{y!o-RH5$V_z*z*rbv;-Gax06EmvbMT{E=#qMZl0xRdIpxn)k655+n-
zAy|FjKsc(o99+pDjkAH>6&=i3Rg$FXJl@e^iYeaS$wLqKstpRpXtfz7xMEj4RJ#1q
zM&sH>btjs)+D>~@A9sXl5_s-l+@~iLBxScS@la`Ci!mYs2^~~0-!j}XG7#DGRiy2t
zBgfAG-k`ZyV9N|*o9UamCWN{#6)v@F`-KKU!&7XcTQ~y3a4SW^SnJ4&;;5eH>@@z<
z3?DSv-BKV@gD^ARiJiDT)x43~q#4`^#VnL#R7J;NuumVfcdtKmIE}8@svH4Kn&JX>
zs{(V?O$Yc?xH!!ohHAMT`*Cj`#(bKrxwiiRFSrJ2Zr<kRZOBLhrb!$_iAP>)$fq7e
zX|QSJR8B<;9IUg3AI6)1aUSh}<mQp(R6sG7$3nEmF1isaVOQ=YP0LjGXc+>Y>e|}g
zVQkL-0L8hgtwBPgFg%KUFPcKhH25g)RNKlJ^_ns{H>#eqRacKB(%|G`i?xt|5csZy
z<MBg9h21#H<C8>L$s!Q`KB}fk@pHJ3h^tFe`<S;Q04ki2X%<FHrWW_A-^gO57ST+i
zLyCc6gf7<pMx1S~kdJM?HJ+D-GgOIguH&+Yj2d;7!d)OO#(Ww>YXd5o`D%3bZ|!E>
zVECnbD!h?Dvk!E~Xc6_~3J{jg-bQHHh-f!25A{%7M<5}y$*;JdBnG>MmIJ@fMza&X
zE(*z!`K?{cG_i;e3@bIOTbSnO#aEGe03wXJJ}8LPu?Q4r#YYk|HsCTTjM527-HcU4
z9(M;IwgUHxVUa!1SEEb>#_hSYx{*~`#?eNITj>)=8<ZWIx<vP4Boms>X+GNDD#^}k
zL+(hMmM)0uuY<UNh)TQmu-w9=oNpNkwM9c~pL^uEV^)rc)~%C=6y5>^()>aU40)<;
zWqy9)6>A)EoQHf7Q0j3j+u$zd1Byv2L^IGzLbAS3{?Sd5l~a??)lavbrFOE^O&_?x
zc_$S^Ajr2n4ab2}Fx}2F2UXV)`#>&p`J{$b7+*CA6K1id7?K|~q)+n1n@<E*ciE7q
zxK0$+)`2WrOfKB`H1KGF)`(J0SOR*fGUk1+$2?a>2>r%0$BH6oaUZyQ2CQ;OBbaYP
z89n)^VvXaCqdy>Nkh>WOCnAc1LAaB^`lM_$Wp!I-Tb;^)4<@N~OHx5A{`J4qEufqL
z7a6K++Y|?4N4b9LnJWz*c`7(;b3|$LNoR7CqY&zI#b|Hzoxrqd)lpAs$Vgy8%~2?Y
z9{2iR>qzwoVS?eZ>40-yov6(n$caOMM<bf~7eLfz(r;s)Hz9M#uW<B#s#8#Cq?GZ1
zJFI!Jc{O64@OrkPAv!O#P5|zO&~5gWcwCX@mOF^<><Xa=BCB+5MCtd&3xl-@;F`-c
z+u*(uq;4Si^5Z6pjzw$(@u2MIjzkWGGPjy3BL#sbsHVFtO)|m<&H$oeVh-OF2*6y9
z>G89I8#Q9!orTN^BaUjnQ;?U96@R)Yxn(Cj)MH>x?a37;n?#FAfe^_q$4)VkOwy&0
z>LfCdqOGHNrA^KTM-*kHyc@SFjGC=tcA6dJRMSK7Hsn-tr@I?-;*1flIO>@sSpFuo
z)tT(gz^V+ZD|CF(4zbLu6mN<u{{T)_ZNq}y(2-a=NEutM>oSa^#TRajxv++aA_Uw?
zsN{h}XB--wBQxanT_WYSk-@7u6N0*HJ?oAtmsIraj-eC9aRN4Y$rY<;vM@Up2|#`%
zb5S}2R(#L;k?3a8FAc;fe{9Dm1FFPN6aiP+8=E!tZ>@B=^&K`_c@Q#{JpEV9TGfyJ
zGgg`dz*ab}o{v8FvR#=o<E6O|(H)rkZZLdit|hf5JoXJx+g~D1n}7o~rD>PiM~wUk
z=DIwYim5EYI5|JvMR{oPC@8sHbBebK@33On6$RJ0jAK^FJ}8om72HfN7!En8p4nni
zBb<}sul1WI5{Ux$Rd*^Q1AjdAO3(@$utbFH;-QZLfpNjBOR%Tqw~sU=$_@l;RWO3Y
zN?c_}0M@rg3oKi|)N3e;V{?<qtv{w>l0q3yG2)k`CRtQ$(dp0!O_GMm=+PRq>kW~%
z4UvkMHxOBw#9tL1)_pZ7XT2r)=lIn}HCuLhzo-<AMpJ@0^Hq_lEyZd5aEjOFMT{tF
z&cdu1eN!W+joeV8HU_m`mE3=c!5!5KMhkQGR~jY4Lnr`Zg$8SKrf+oV86-(da1SE4
zIz`G%id6Albz;Qyat&W-mv3(4FY(PZB~0`B-8yT8dM&$rus9WGX3U(5qSNl8nl)uT
zRm3tb+y}s`uY=D@6(Zu<iTN?3BPWC6sq7^WCf)fJcA56T9Z+SE<aSOsV~T4ybC2U(
z@@+u5$f4$Nc^p!tSt(K+*ytbbp<{-?q%3$K(_HR3rYLUEhnM=Mx0*uyb5XkxKNQ7w
zjw~2s70<K)6zA@kTm``#)N)Lra0fqi0c*(VnBa3zv!5KAYq>Zlp-?=lzbn9^EBhaC
zR~aXo4oOsvpz}#9f=JC)Y7>ab0l(gr^gKCz1M0UzEP`vuzft`_WSjPIN%8*xRt;%?
zQ2j|Am7^j!GO)m}7rD5S;&+xtC79;3%2s-xO3~N*k}vd|#z_?Jt82&k2|EN<O43{q
zGtFu=axTz9$0x-!WOrH1w;%=Oy01X=b+LljG=v7mMOElto&<-RpQ_eDr|khiz{eCP
zD>3HEpT!2nb96={ITbXHi~*5w!?RIaD_Z{mjrh+-lT*Bo7zk`J$8|)Qn4tS+H}w}#
zg)F~npDdp>=J1|&ky?LP%loT_IpB3!WegvxvI>#93Xwcx{Jr-_#~hn>ul&_~Xj#ZS
z9~EO_&c|--ZSJf?6WYY#{!g0O`h7pf19SCQ{uQ&kIXza#M2xk%6gyAVXp0oKG8A<i
z1~XG%N~J-5g$UCGP_8oBrDgY60(kN&)yPLiOSb&wd{HuhwyZfh6%=v0kBZ?%PSfHR
zjh82FK$%2kifIs^i9Cvgg?45|ABuJTvoy@e-JYrfuWKTsK0&2m3c9uS1YkaQjGx5;
zaGM+|9(<ayx7#9>bMZYBP_zjSMBIk{CYH!IOb)W)fH|vd2+)J_4ONk<EQlk0*)?j=
z9je6eIW)*9tJR>yIUny?IRFwcek)<DINIA&3TrEHq2xL2R(yzLRZdn(fK|ed=>0Y@
z&ZaPO4;ZS;l2UAgR=%CMWV=E@E1s(!cGMjgVdhBoZ_>FYk(N0L!27MgsVvHEo;;ey
z`X=fKUN%qxZ>rns`jVe&BAHS-HS&K}cd2qn_uihGf<+ojA}HKQqwKF05C$Q?x>@~+
zt;kRV<BHCH(*17>20H=jvPLT+=YwlX`7~E4``Lp90Y{2^&2I|`OiB;ts_d@rMaQ;N
z81q)GYYfNToq%epQQGOU#Uz6)ziO@|+5=G+!aJ~{J_xF-duR9lEtK&@Up2%LW)#c#
zsvK>Hu7i>Vjy$xS<EjG2#}~~zWRiHG?Bu^PX6XsZtqqH+<1y5dc=!u#kB&XLFICq*
z+g^>NRfel@i*C#~;<1+6jFQ|ijY-;jrN@p^wugeWZf(hU_7E|O857#M#Q~#grf2q~
zToLA{zMo{_w{ua&#nDoOZi_pD6+OamF;sVV;n~&?mfq;^+#7`vpX27M7TsXFW<kL<
zBID#}!Nu}WSn8Lpt2{z7y!oq*S5lVF1eR~$d#e*=Zb+CQ_YcsZ^?N9;tr`h>HGce3
zlH!AH_B%|}ZLXpQA{ZX)t|M=BJYa!AdUH>YONG@zY@RB{^7<H)FaE<7RMb{VhbXbH
zjie#eeVDN#6a5NRfv-?VTWa@0$9n|FX&7I~sm;oJdtmt-a0M2ceG6P!G{}n=wjYWD
z*4x>fn8ydY9U;C;pLN)k&+)5hreAM!CJh&G>4{IVvV~EcVuicQ$ApX#=95sqh9o81
z0-L8^hlr6LR-%imp`xpj%E~Y-#~9+PFLuQ+a+x{f6zw}$j^^d<fW{6ejX{KJ4$yg{
zP9L~WlbDhgmH=6L6<h8qJ|FI`rDba%%YnsWwK?x4zf^??T=Ht{5Z_|1CCR5-WMmQp
zkbF?LQL#H0%`W*Z>_OgHhl-lcbeS`^<x924khg3D_HeH~Ju2JG8!W`H)e$@o?(hC0
zyZ0qXR$up~Q*WTM7nNg>F46r|I-J;t;Jnd~90+~aAfLJ(SBl}iv|vp#?!>Z&8+M*;
z#d#GZ(JXQXCj?cL@cz&cDz((yOSGI;wKpMqWy}m7!-J8E&)mkl*fGso!#sAVB4)@U
ztM65P&BLQhhlv7fZz#{|Fo7r`a0M`JI3vYPtUlGxiWU~$gY#WT+lpe{vGPQ+DO3Kc
zu+d;hp)s@KtC~?8s<jma!1<-gyKM~^rtGnj)pv&y6OO7;c`7iBE^}4sG`q*dl>4I6
z>ezq!!m`Q_&n3y03u@D#W;isiC$~%~&MLO!QMyFMyk&(Er`)vbi;s(aRc{=UuqVwJ
zsKCJs$^2Jbvyh++5l}}kN8`;$HO$+#mH3n0eZ`+8C3gzhQSu|YrqtqTB0Go5t_*_;
z0)j`H9`FDgE1klYQQ!t{HVC^HmZ9c}(=6vXq6}|ua?&su&2-;A(wq=_6+s9+$Rtpu
zPT^6=>R;jrr=oc)kLr*pFOodeAWokev-_K|x9+qS(r-TC-~s;tHJ>pth?F1!*sF~q
zP1b`WIIFR+1)b6ys^gyM_(F?zX2~LgLng*`UOWnt&CW{5ai0}p`U%rCEWn_{3W_+<
z<vY$Oi4rv=D*phzLPG5)jEV#)wFs3iq>PSy8m^r&jje(WT<a<B!WlEXc@;{?%AX^W
zYdn=Q9Q&Aj+mE^(7ZP`YQ#!D>Jz!Arg6sx(`=uaW`5i*56OQQnX%+=sXMF_~yolLG
z;}qw03EAJ~g@o8?@(G|)Pc(Fa8Y3GXF`CYLa&pbHcVBgIUu?yTf3;kdkulLIM^L<q
zmR+lcQG?_f=;Xt!qA`!EBu27=%rL$v&`2lWFh-99!KuV3VJg6G#WQli!7-3%yzESc
zQfXXxAwxa5g|K@jg}u_PCsIx~ek$itwpnBMmAM^?rDN{4H*6sF_f<F&%<b-pHskFs
z<?)JLCjS6Y(Jnz`Z)_UWyvqg-IUXsG`+JePR2c87u2_>AGNWhdA6`K$NgK!&zKN|>
z#-nMd!?-Zrj>UNR)2v)(5@LSA6keY--J=Cr-GKWwj(TT&7sof*>Dp(huJmcZXEH&a
ziLXfX@2hm3LL~xpNe^mIHQ)_*G@x(6%`l6UaQ4p8zt}ZX$1f<cN-w5;RRod9pvKC>
z`>AjReAA}-&(ykD0c3eV@M}k5`o*YQfG;faJdh}RME4p!x<Y->M^zJUd3=H9yx&ay
zW6>wTxmS(9s?zD-s6Ly$9}A!l<kCD$Wt7rMxH6)3_eDc7RtTq>t+dwko24Gj+$aA4
z+N&FR?8ywW#!uKZ7AA)GQDBoW82hKJq&5kt5#uKygHn7AHqkeuzi=JcAN160<6$(%
zhz2A#2f9^=9@?vMW2qtb;B6y4*D{URZa%6t8EkIFN@FfLJ#|BCV_Fd~2gqaUnBhsr
zIs2(FP8EeKro|?v*1+VETXPV2+#e#oUHxN$E&X2MP6uQP`b*Th?VgikEcX&B?Ni2U
z<sVq;@@rnDp6hPqVnWuPBp&-I%`c4=^Q1B=ynBNIR=OqUxW3fHkiAu8JHLrvknu)o
z@qus|hR?-zQHav4k>D!S?`yeL*BK+tM2Q;hJagnxGb3#bkH#u=!hr5<;%o-w)gGs&
z0{BG*YWTTsAB=gbeR|-mIEC=kl9EJjm6^R_)T~%V>%3JI=uvkm<an!lnXZ>1$sp5g
ztcBR!z^u+iPb#9S-ZOf^6-a+3w!WIMSi+`q<LZO9vxd>K$`s?VMA>R9X?7-FapHxx
zW6hs)q+EKY_x74mwk?i2^Im4+SlR9}SIEt2HLX=|FGRN^3-wjnys%wB1WEuORM3ii
zvg|NVlP9U(PjbWDIRb}=B!ER&-`ecP(VWw5ZCLY)G;2vEz_FohWK_}r0EyIv>Wh}&
zWC6U;ti%vW<IPG5snT^JC|Jnaz;#xsAce!ToF5g3TdkJ~BxkPcPZaUp+o=b~Kiwln
zY}nM1y7b<r71ksCPgSr7{@>i)!LKIJ?tqlUSngh{VQH$f+j~yr6{}+|4>PLMD9-5L
z=qebg&nC1)6&TH9JuP&GByHrH*dzOh7#|eO=cJUH8rTM)m6RM)q<y)eiw)T|5Ur2M
z#tnBnKNQ)R91t)mzULV6L{1YC@Hwt7yKzkolgP-Z-<+QUjSJUAQj?m7X*Uo$#Y)YV
zJZ6Bokc_BaXqC|ESgT;S6%(?PjQ3EpNZb&A6%#D%xZBs74UZTNyqHF(HHP)CsGzX7
zfARw+I2EtFw3<{PvFFWs@2fovbA2Pv9IVaY8m?`fEe<SCBKI95%5}T>^$V7oW+x|s
zRfCGt>VA>dEu|{f>_^@zn&(Wsw?E~9hCdaY_Bh-rD1+OA(g3}aUZv<Q1A45o{{SK~
zE6y~vRlkee7#q3GdtQY8+z^;>M$ttT>SoeO7g}wsam-?7`3|U?+14+K1QA^uTg-Qs
z*uZyH_cqhYt_+0dqe{$?NP^>0IV+vV#Zlf}&XLG&N!)*kqO9!YS(wa7DtOPDPf^<&
zn=<G=VwNkOO)&0^wfu2S31R$?HJOzdO^>5n80?$&gfQn7n$)5(%f~gUQKMP~EYY0t
zpA>eS=Y}hso@%;ZnHW7$)~f0OQfUzjNl*q35Q@-fQkDR*#w$9qyF<0fE#j^;mMAtJ
zN%L35@&iWuT|i;yx}1d(g&*@nLd-&;9z4;I5CPt!0<A<t&@+G)sV5AKec|!j;+{vh
z5M=K(`?>wgf06@%-5Uv!nqOk|n`tOd8KzwOQc1u4J~K_cW_hF@0sjDsjz5$@a@%{Q
zVvBL_%@GhG!0#0VMplJ=(pV4tQS8keK*Z#3;{t`8JW2rrCV>f~5wj<<rvtNBWCXSZ
zK5{t~pR<|7tF>^X@+$sxUuj$Tj8h?y4a*}&;I4DkWVL9$qFa$#`M?q3xUBxKj1_dq
z@l|9AnYzO$!sqi$w`T-0lBG=%bUmoa82+dz<z7KJ&1GXQ9iEx`f2!u<IHZ0?IVQcY
zOK7hmQmPvQzGd{Q%`TvVCL;#EjPz8(4LNuBx07Ex{{Se`M;z4Gli2z$qEg3SLh3OQ
zdIJ1HnmReHgdsRBLR{kR@c_?2Fu$aY#^3IOoHexg&Oxs%t*SfjHu)^zvW=uNZU7TQ
zYgV$z6>TKL4(ZEv7qji8pCd#F@do8=wJzyJ+ZieV)8W-)A!LzT`m4oBqlC1BcN7(r
z&W*X0@%W;Sq3>>uI38&-$;tFOv9=c`Nl0Y#=7`sY&+NBZ0J08h805CLPjXiS{phB<
zj^ZE?{7obHwInU3%KatgytFBBGUJA+PMZ~~Ga><>#T{<|jWZXX=v_+d+r&&qG#*F0
zf#SD52sFENX$t@W2B*KZGBI_}6cx4FM<(6CX0#S&IW3C<4n;?4*(BO!c8>!IK4*$9
z{wS2;aheh)BI(pNG1Wz?{@`t4{Aya01B<>%-sNqf$iY9FWK&0WgCJ&r)a<0Xv1W==
z8yr+VoVL2Qw<7I{=elK&<GRPV*W|W%l@x`?H3W948#I>+BpwA+YIoO3sw8QX5%X8E
zCi`Gr`68ZpoFb+6R~hz!g|)rQ5g1kesV(e(XSuun>xz2}SsI{cyP9yMpKg&i_F3BD
zoYyV;kT#OE%~Mekkc$!Ljs+U+1-xa591iGWvEDDk2AVB)(65k1vP;M^+@9#TVh})4
zR1PsxTSC^^T1J!^0-SSFQ*VXPYUM>{iKpzol4c(=b4F`ITmXRnYRU*!DMY)54L?vt
zlwlVpr?}@UktHP-R#-8T-)o$os*l<cOZ*of)lXq;@!c^!4;9yiRz_lY^H0;@u5z|T
zFhHBm2>PLI+=)x7VMPxO;yu|wNm@2ZqZm#GY1~@iDI|gQ8_S6f*i;$?ZY2j3A*&Kn
z-!2Z~YAD%m4q0(bno6iONFBpm4TO-Nbe5JNIcV6DJkd816<x86{{U3TQsyYwV{U2p
zI^bW)TdLf;0<3asw%AJ{X$d2aD#B}h*Ex`uspW+uhj0K^n94C!irZjzaET=XLQ7KW
zahTWwIHp^>i;&6=DD6OrP!+)=#Y?G-+epl6ur!@@Re&@O6pkxLFa=7U>XP_kO#N3w
z6zx+I)99|$E_Sy3e;S)eyb_Obl;Wh8DBS^N2Q*fikV3dIj8)Q{c2xo~l8|+c<ZmdC
zX9lb!v$fB-F`5F}drjpPLbXg0q?t;w+n#F#*F11v**TRGMMVP!iP-`lamW;;StuLX
zLw-$1aFI-yL%4n_izE6lFA}6f91-T3Gdn^uFu!!ik;o`P$m=yKK4B5E=Dz07l9Ago
z=W2|aOU6(N=bGYCCm7=&RNJ;$U*Z7bi|7SLsQ`Qu*y5^^H+I;7k}Bs_ibjk-`b7b0
zFSQ}U=CjFD2r|g%rXn*+BLv}_Y3`?Brb57QY8hK&Z5;9DlmZe8=OmF<-kqBgdt`8G
zjx*UcVWnLQdBGrZeACB<25LRcrIZ1R7-B~Sc;=jBX~yIj;+m0`;XxSguLxSNx&Q|S
zXnB==&zA>3^F%`!3NZLJ6qxt^YEC@RZ;}w-Op1m$qwI2PHK-vB0)vuyt!$9S$8kT<
zs_kCPn7zeO$AMJMabYuTuFJG%1G-pML~IIwy-O*Oji|d%=Aka$-owGFh&SAjWe}1D
zN)@DyO14|bsTEQ<a@gEzJ9z@hu}6PYKoK0WTS^$N8|t*SlSg$5?*3Trv$qNx2a?`s
zoib>I!9C1)?wSw4O&eSY)l?1_qxVM(sN}T;4jFkrnx=kODd*~p5&JuN1y6Nt5eGo2
zJU;PnkUdvO=srZ6g56zn?f(E1;+pc2Aq>1xY=rUzkbsZ09_oWzhugSUiMSorz$v?M
zG$qx)z8D@s>XF96RzY#@$(bAgDp{wDAZU(P%?&gH6e<YLY3`_zssav1c&dOt(npef
z{{Ru<jd@6X5uA5b)6569A;$xnk(`yrd1@prwA-ckM&`&JRjsx3a$Rf~Tvm42G)b4h
zC!<V%dMN{gjGpRw3AL1maCU+RifyXf`@~NYl|PCHO1pW2hwVii3g-lTvrV-OgZ}{N
z+FYx@+E70Ngc>fMCSe-755c10b_=wcbfu66;|7yr4UyYe{?z{f+s`7O{{UKuI@YPD
zovU`^Kg8ypZdK5*=OUn<*c=HC?DhDhc;oa-WyO!K^^0<=CBbU9hxJD4G8RN8yz5zk
z?WY@g6<E(HeZc1im)e8y8)vfr0Qu5lgZwA^qV0cEpewdS=Db1XkjOF!Um~ZDO|kB3
zu3Bg@mPU_sw*5h~cLz8bt2O;VvOoy{*UvIsNXQH)81AOD)Z~gm?h@1^q=E~|-)F!7
z0QteRDp(~}>Yl!NV=HdEN%LM_mkiOXj+&&mxD77S2sLNvqAvDJ>d9}lg6na1ST~II
zSy7;+vG*E4<aJlK?!s&h!#-$sR@@Y}LHny=jYV(<@3S!0;w!dtGHG3vpXiSjpt!Y(
zl!5?n2B<YF8Q}&*IRc%FMDc1HVb}&pH6q_hjI$r=s$xbUG2qmYl9K-blO%mq0Cd;y
zG1#M#Kxz?6=vhYdQeZW~EAbTTdHtiu$mW&BGB#5CvqGhBEsHPSI;Y#oY{M{1wEb4H
z)!?`UO%n6Q2&iQA)uGGdXnwIs@ckL5flH$|Jd(2i0Pg<)dI-s}^5V5m=~5#;#Ee_$
zny;VJ_v!IF8j<6YGfPb*WbZdz%Z2Kq{k+gHIV4u4{W*7xh|W($3O2{nWc=>3NbyRd
z%kimIQIof_lJScKJW)%gFhZqvky}{w^tDjY#gFV$Wr`^q41@iu)M<|klMQ(YTXH)h
zsbs<o4ngj{8eJmNK=^Nw9U95%nlzBiaJe6f3~MSFL+yZr4dl@lH*Ex~8#lnz5=O}7
z8Ftq!1>697^GQ}ltr}S5WRWhR@mJP&hUEmY@JI5jhL><v{{U|zXXw?WGmAO$gzRnz
zrB0d8)oJ%5(0wnegp@?b73kV+_V|DYAlHQ@>Z7cX9;?tjAFf3-0y$*b<E)yx617K|
z)@h{~Z-MB0m4;0~#4~UxNG?6Z7;MyxN`u`!7<bs|W=w7W098gL&jyBa-u6!mPYeO)
zswi35KyGQr10yvC;ClZ6YI3$g&vY0Q9FdYq@lO4$WplwMnjG;@-Z6@UDI|!Es)NGQ
zTH7D#qd`NAAMT_Z2*p8KWmDRd+VS^8$!dgQmb5JvR|kPY+}nb~0;gkqqGmN~INBa#
zU-+!--kXB-A9597e-*L5Ov|^hDj4G^NE}f1(V{x}!PTB|s(zeU+$nos@yIA^T{`~&
zMo|ozkr$?~QHnI&cY{M;+gr)Prs4Uj<%+sGSiMqGjZN8yw7ze1E`+UF#d8ZNWjHks
zy{CTAHN+#2BD40_lLy@v7-z4lp{hDq=W5Y?sDmQ=fwqy|EvL0}4j6Ifmrju`*KC8F
zas?!c)&%Yth}CTx<11EEYS!UoaS+1dv3kw0NeK;Gtwq()nDfnJb(o!<xH&bhFLVvC
zm7_YKbB@JW>HC-*KRBR3G0Z_9-A!$9sUZMkG>CG|cMuC6jagdE{{Xog4sqtHqLgjP
zJb9}PC9^Par)lo4h#EAujXSt$AA!|av+i}<&U>o6ToM3c7xPBKz&QZt6>=bRIsLvo
zebA3F8OI<Bd`-DZ9E|<aUE819HpAkY$YiglEpa18I0unZTT06jz}gs7Fp=Cce<mlI
zmg3=1EFlJb(l7{tll-9W0Mf{!K+;Iq`W4W>?UAy|;g3}X<L^)vNIq&1p2KU*nK;3#
z$xi2AmO$}Yn?_}s0pljM_OZfX%z%TA=%Anr;Ie>MfGU?%jJ#lX`K>Z6X&Z)4JXIdB
z4ZLMghRr1U7{+T6pd|sxs>D@Z>ho9w+sI#)Qbq}GYa8rjXniL%aOyGOV<x_V^zm7=
z<wargTKS^RMvCA%{{SJ@E7!kGwabaNLgBdMHRQj^bW+QY+>e9NdM2etTN%1-GJ9tZ
zlN@I>l3Pu1`vJ%1udbjoY2M&G;;C(?c=NxZ#eDf>m!8jMACzqZ$)!tZ1fKTpYU0*J
zSOH_us8V#4jmInbsjdp#D2m-c`=dEIRfzA&A=Rv+O`C>%@lo5&W=lz&W}r(eSO@vj
zl6=wj_O{Y(_<(|u>|GB@AXl+VvK`!o&jU4SXQum6g0f_I6zk|=XaYjOF#4%9^p#Kt
zY}HfCCa9FE>|fm~NG<X=pB0_;4ZZC8gex9ZH*#p({XRjh$8{X11F@R9i9-+=0)I5o
zlE&1RA-CYpUFqT!3P~(Sb#bCYBzBui6U9fZs8>^*<HcBM#bkhEiW8fX3w&}@I><F0
z#Nl(*TFW217yDE?ouLz?io+S^x$jI)s1TkhTvJMkDYrseZ9-#uo>HbW@!(VrRQ~{%
zBR^GDf2jzkRW2~c7^tG#3R}lLv7Tc4cn69h$&L(?Mg>i6a}iAK<H@O&V7Xo~wyc!f
z{*hXXi=u|vH3=Er@?*~wO5Xb^wn)f7bZzYG0_;{F(59kW%LeH-mi*LnQstt~QnrB_
zCcIuw6U|L;W?{jA<WfjJJ7UTHs%t?!DTkrCrNA!N0!p(o{P>_^j%lvFw1t?CjcIht
zCb|CrB;9xwp|jH>*f9_U`2xH84@h~Vzb^>nYCTC}dtu8w?(6q4NwLwF6=iE9rMC0W
z6|lU~P@j?lr^yvbs_CD#QmM+3#V@G(N3joc8{mB}sjTm2MaiCq>>H-%gF|aTyk!%%
z^Gue};JGX_$)gqnGG)H2i??G^M@~|UDfG=Z*iz;96HdF2E+k`;DA^cVWIrI&eUWmZ
zm#VeSSGj>S$3V8t0B7!nnp<^!ig~G|hiKR+acGGq=?63-mD?kcEu5aq7h#e4pzrl3
zc?w4#`_%JV&dZU)R4P(72vlVGHDos3v0Ab#X%WztA376IpJ_NeeNnoYj@m4)3m>Y_
zJX235a&T)#IH^6}1#}y4bcq=ua>k^N8JINrz;6{2TZoh#^eWQK2D2=FsLn}pfVn8{
zW5{RDDwgCH&P89!fXwPn1y^cT#S<d6;hN|bMLW0y1mcK-MPLA+ZeeKw3fMI5HaV5H
zvF5foryH_vc3eSa6Q6LLJxLU|QOoUYC43re2`Z3D6l&clA~LyZ$0<MTvWVYK{lkZn
zL!YeCcGmLSwo@g3sjF{sBL)X(qTAg>z&zCwn|xS!Nb{nM<dzty<5v%YPAT(D=mQ=F
zA(L^)>lODC<iv8s9gaBs(^fy0N$QD*YcNsCrMr({$80EB0Cf~CZom#PS@_t6U_b<U
ztz<sjibSJ4by=-y*saqTl&YSERgrQ)7m=gmJX6(u)XDCeB5jzHf+(v}ikM~Ho`9<C
z0I3@mE4Khs{{YjH2gLDH+!g}`O5?>2126>m@lwcWbX$WYJdERtuzzwm&e8m<JEmMU
zw4qpSR;@H{?xBVVtCXz~0o$yGZgY<!kBba&SAc2_vq!nW;)#Kybq6^(>W^Uszhr5E
z9~^yCIoNV{aZ<@_ceCf80)|#}cHK%sk5qzK<@Z+@J}SK(S-xxX`mG)HsgeTWvyT;>
zc-Y7@!Oc|TOoB)cetxTxR7MSq5mMZP8jN>}iI6?M^ZdT3Q3;kp>_|DyBq)K2&&V2V
z3dtVezuh$x4rNx`_%#s?KAkE_01SR&kA#;w&s~FAIBwZU!{F8Zg~66H^2Z(3(RP4b
zB)sAw$nJ%l%OsnC;*u+jHWZAG4H&MCZomu^O+y1I033me?8*jqfTyYrnUTtd-Jca<
zWUA}7X88E2U@NtIT-&UwIrEC9E~$VC$?r9(mnU>f%8Jfjz(X@I`Dco*NP`p*IOqGN
zh)B)J<L~N+f!awnIlvTY18&&(`!t{=GY@1$TXO?Mv3VnnfAK(Csw{{90Cf_{Dv--3
ze9&PnBDj*uWyl*(b$e+#P60!l)-qO=#^TvDW|62pq@g4`w{`fZi||V}YGjD~xy?vq
zmE-v+1MYyfpWbBz?gUX1JF5G6$UPc;qgYHAAGhK2wE3pJ-@4~-j<Z1A+_7~+Mg<LU
z^6vQY^k{A5HeAf>JO_I2Bh6E4O&ldmapIQN?qrdW{c%sZV>D#Q2lr9pK-^VNc7%l|
zZ`~tZZe4(6FF)N^-Nb#?0q_ChnQr$oO&oEq{&>fluV^f=dxL?JRZkVcr9>c%j|QOB
zBc3s~Sj=sl;EIpmmP80oH4-J47Fv|BmG>VgKhsy%myz5jxV&TPu=gt91G_(BtTap6
zA$3wRhM6K+tGtyQBLknYQpT}KA14PKP&g7W&&M=iWB?`!KXlc=Y!>z=+_-Kjb|wUD
zp|jme(QbUP_|&k%t(If_dZ|b@XEnV&paMuoM>rK)k0Lht6T027fx%L|V;t6BShHl5
zxH*h<j}(<ae3fq?&mDQEM5-NRQ_;;1(xkGidvHPI(Tvc8gkAus(4kX0%<5F^BkRps
zTU<yZM3K1gYdagV=0b6bBwdEL!EQe2Qwgxr$~Jyr{%fuyP{ow)HI&n}Pum=!J5L0f
z&_Q%1j5;_u^Gu>k5AH3h03x}SRX#y?8XO$H8T+W@1tiNp;o^?M3v(jL3q}-fs$1>$
zpfEslR=(_N#{&*(E4zPsBc4gAu>j1y!!+Rk01@VvU-HY0f<EakY}z@9DnZY5*&=B0
zaB)JA2hU|-$RnB|xw>kllhf1t%&;7x>aSxy=uqPr^F^zLBc0glt8he2$H}1T2-ryQ
ze(6#v;vU`1ZMb>GZXuikHw=T_A8Tu4ba5nV`0kbBzKHo;m^fh&#qJk4Kj}1OwbR8a
znA>R|n%7@wx@@=tGQU+>ap|biP0KG{DLuA~S@hfbFFil%m5w1D!6Ap5)miIsTmVFh
zf8M;bSx+3k+r)Nfx;Eb0Xj?Hz$|;nelY^<yZFaalGUDwIJHh_|dgv!s&f|*6*cJg=
zNeddag6TRZ6!B*>CMOnM2~O+-Q>3^|V<xFq<PnuMG*>L2<j6EiV-}fgvlRn9(uq}@
z8O2rp=*Ypz#Wq`#7?nJI)FhKgx%UFT+|<H#-~-K2CE)3eNG}r^Ug*#l;Lv{Xl?r@P
zJ<6&UV^j}%5CRxy%|mmkDL|y-K4?-TT%5Tqx1-3!^l3l^PCKfYZuuWD!jFpJHZVpg
zqNzo$!ef9xCc0Z<0bn?(qmnkp-stG!E}=mf^HGhBcru#3p8I#o<T9S?#H7?)OZ6nS
z(m%klz%}$Py9O=T{VC$UUG?n1`kH4>Hnn8Rsh+RV<-RT~^!o*nI^00#1ZJ(~Ke~ZL
zpdY-}Q_@$mTxjuZZd72_g?;aD<~TlSDB1fZL|1V}q(-Bk6-ldUQpk5N@&bw0^=P5m
zke$vCCZ@K&ig*!=k8|pz8(bQ(WNDsvsoKdM!Q3!vJ79{$IPt-)x2QDOFQRE7O|t(0
z_|`)1)Y_ks`F&NbQLuWj*|k6>t#pSgfyeb!aBc?!ocC6CgW8oO`!w+bH8%61Lacxs
zc&Tl9aNzC532E%)5_ad4nlQXlx~d#)tCIm@uCoOlW6evD@w{vGK)lJbYK}!Il19qH
zr#o_K(G2zSs$4eJ!{?JjO@-(AG4g5Gle<pQu)_n*HfN4UU8j-QpaM9!I0JF+pDor#
z(;p+vD<o+$vN#0zscuAN3K8=0`Jg~}ftunopFLK>Hq1#GUO?iqHc*tfQ_exHv_?PL
zDL4<diV7o}Ee^t?@mcRtAVpFE-T14fmwo|~t952mZH^g%rI1W!uOn>1P%4Bm-<)E!
zI;c{PN#>|94`(>6k{JZr+s`A9X&mS1SFZk@!~0kxXCEWpYccw5s%fyRrSwX8z$8}2
z4NAlJBlg8WCyMhPx0-W<#F5?fym5n5v(uuyNc8YukVQifvXUm=Kak>v(dCBf1VX^^
z)jv~JhY1?(!LORQR!V2IgZR=jdb8d^b?w`sG3UCp(r)Eh6$*U3njt;6#`YPhEn#Q5
zCNL{RrxzNaHt&+c;TG{#9Bn^zmlhJkZ-{0j(D!rPTer5c7HUW@FBAU&QW9$}*;fUT
zY?ja7h-P8e-A5*>>vQd{Ks{ooLp|FP(G*|*02-ds8&N;D@pJj5sHgHC{EfDd%Hf~+
zQEwul$!(_<4zUrowt214mIELPrqOPv)D-*5L+2Q)H2PU;9YC)pSmTSEd?9DN{l)7;
z8Fz8`s|_~vq=E-htUDEdqUi9Vd#a#0@<l&T(xP14D&@z}uSGvVOBWB5n7vs_bk_wg
zw{f)*hgqjuvQ2JC0Jb<aCZh^XXiG0tE{(JyMK$H#Hy-;(T}mz-6&51wY7?cy5QO|`
z^XRE@c9MI9;O4Gw?SXB>gGn4RAPn78`|j;<OGFdf1c1W4WMec2nQuL_GCa)5o+{MK
z1dp&c%grqDh|??n^r~;KuzuCZJ<sh_Z*P_b2#U8Ln41IpsU?WccK-mU%`8@EgJe&G
zMN(XaGFxH$RmmZ$Nbbb<Dur+f=AnB@5pg5u@lpH2Csf(^%~^&}jzuP{uOuAe(tMkZ
zKiXAA1#3xXuwh0B#bET7j!3v;xK@ixx_BEh6SUXRFwIXIl1Gut4YFvxz1q-~6`A!T
z1-4Q@kgXKgR^~0<-~$zv^*pi90TD<zJ}SLxjIc>Rpy^v#qGqm8IoxrI=Zq{H)x1<1
zfR$m~pqhhCmPyr?LyUN@0Fy_q*qm<6_P23&B&8xkMrzAPX`;9P04XDin(YKK`+-Gq
zkz_zX0<=nMQAr{vN`aQ*R=PXajPPiisO5DG!kH>%K~Yg$-N>-*@+n=ms49iHypbUh
z;-ExEJLEa@Lo9|)W$L;{GAT&7{MFlN1ebdxO@L**Q1<Z%xN(C{wYIp1K>PQBP1G*c
zNoE){%jt3}Tms(`T&kW86{op^*vBh()l&ZLr?|?gAbFy#*{){_PBzwwV|!_WHPa=$
z0I=|LRr;>)ZyBo_#n~c;{{Wf;R@wmB%|0H`uZCyt0aKCPEDXcsP7N~m+}*(9p|+FU
zJe*fbt|Z*7H8m6Oi12^9xwkN!NLX&Fm_2|(kO?$=Hch-fNJ`C>L;4vuPd2R4N+O9%
z8amS9r%b;Fhe2#52&fBv(Q-i?N+b`8%Po~j4_;a44*vXLrz6ct0N6I;HBF>k7~I0I
zk>al5m@95{1d97|cggZ%SmP?$=v0%sg;K-g_ed@xkxTWD6gx}q9hl1apxuyj`%Nb3
zdHWSjtShve@Nl(nXcjq0=>1aPLmZJ4oD<y%a)4vQmnBFhv_6HfTg_k>QL;7MLv>Xb
z$hOkqNBh*eZRGas(!(iCj2wBYTHuWZdYW~gTAaj)sHYemRYa}1hSC7@Q%sW0BTA?F
zcloHAEvtYzG_h2uE!oV>?7VyhXtb-qZNypm54xm|C-(E!JX5V@_T?T%d8doy2DN0&
zKjp{!P>(5A{{R>Qk{MD!s)V+F>3y8YK|J`YNp?IJW4k>4P}7Mce6YwgWvXssWZ;gg
zr;st*v=BVh?SxBY*t~$ij;f1PfJJa+Md0=-{u0~vqVPv_1=;rIJD1!aR3@y3YFLRS
z+dF=!aO@i&>8njbSmu;~J-l=%6+li;VD(g@5_^g5?If;xp-hK<)72Ms5Oh-527`3T
z+M=NcC~THqe-(40+z<UX;MF`}rUr0Fbu=?=op$mMW|}UT4MQT1k0f_f+PtcPSbR^K
zhfen9K;^m}Q3mLHp-N#F@mGZeIEmv6wN;0jh6LV74h9V~`OIYKcofLnY~)~oXrw~L
zQ9|!yRa&$QDi==09OwK*DJK+`fg}vA-3G6M8JG|&64=kxcG+-VGsx^wx~;FZBS^ha
zt19~eS0TUDYKEF2)~~qWe=>QarNnqFepu?OVJNZ!jB`Xh<Vn4NJXE3%-6ObT@k)_e
z6y%)JT0Bz742^|7ib=hrGYIe$bwGvwgM6$uw~ngq*=3LUbK@0>)8|-a!9Dj{i#ej6
z5MpDxu8XupOC({ODb7dLB!Hje<H4!I2%`5Hv!6a`U__HAcgmeI5C!)E!r_48y44y-
zFc5j{(F*FJ!5w+1;G1p0!2bYxY9UpvSuL<GIbl^$%PpL#a~TDc<O<YkF?-S-{{X~_
z%xX|c9DrjH=Bttm4%IzDajHPE+}p;iI%OQtDHtJT&H%27;t8|>PAVyyO@Tld6<Ug1
zAYJwoI7p)Ra!!02MxHE2@Z>P({pfjL{@fD8pCHq1!xSxyDd^EihOb4`KXkw3AUajd
zA<Tu4Dai9!iz$)@8;%^FMQgO{gk@8_=M`e*63Yi<+p{A+Xjx>DKaMd?Xz*S{IVAa~
z60ASSYEW!?$sy%bGUJ?3^8}VR4nNwYvyh`QDfoG&xrjR#ZVyywmSgoRaTVDo<DRH(
z?89z&A60*@+6m$J20lJ&k{{bGo5oxGxz1|0zy=vNN9|`E`L2?0Z45wXj_7PL0Z8-w
zsz;Au3Li9R7BQr9p4j58bQ{wSA({9+Rb-nLG8->TZmTP(X4=cnYA0Nf+{rsUDph*(
zOpaDo+WvXws<dnKH~qUDk;ZDqTo(#XaaJTiiKFEt;|7S&_(nkd8fCnS@cT@o@M(!0
zfr_*6JWyyPH<teD06@=*qP>l+=EIQ3>a^i^{7A+*%?+yA#d8RjH38SZ%_NZw+e$OI
zkLU`L(oMj$yaCk@aG^=T!w(cOl13XcMtY$_mZtPC!~j9_QrO+JGVelps)CHL6=BHp
zU2fBu%c;ta=ur*47hk+ZWZSuW6ksasH?JeQ#%TAflM@*u>{f!)QDUV?1ez%mu-Q}G
z$lAOfsnEq3U>Qa#7@vD?EzJ=esLVF6BfQbrD~<L|BoUMw1_f7LF}7mQoxUo<@z?})
z_^I(i>PYu6h_;GfY^Hi_bN$E`4M|+>1cCQbwf1;d<NK;h$fdV!m^q<AxK{rF<Mm6h
z%|A}VF9wb+P2+sz5%o=<Rd#X}SZ1krIt6DP^+m+YN4dcQk-*O{OM~GsnY9^WJci8$
zaq2aPD(|$_ntMh#<e5QM;-7b;=(k+7O3XzHmw-poaib@3>u4gA25kQTy$5ycYio&>
zBangTn$b<@Z4PsiyGg2xuhUyC<V7(A^+_nq(+^I;713#?Ys%1rzbjVO+TP4DZ;fSb
zeGh+TrBdXu@H11tr}dPN3zh<+a$XD>{UJ&GO|`$NA&+ZhaaP)YtE@|IX_0>!uOZan
z)$H<;!f}qpQ<c4!I4+E8Jdfzky&7M1dsnQzWv5%|Qo78A*+>8y`M+3{US3N#W!RBd
z7g|Q8cG#9@AapZM(|tX21?n`hmq321naguKIvp%@k$l@`%k<94&Y8ARw~jMfJ#owy
z20g<)3{vT74X&E4jerjooY(FyFXkfLkc-tPLvuM9tzd+@3XDlSnq4~SqMdh>wu%~T
z&c&o3FS;TfF%^K6fmXtCM#xlAe_~cxcPSV&7pgQk?ybGp2oeF4P^bNy4`yBI^`5HU
z(&Z-k9Cg)S#F27#dC868o7*Q3^-@67vxwAZ2DKie(OTh6?WABb_6M5G#PY<zxY*gj
zq)Ld*6bEE+iBywE2tXl$+nz-OZdt8_u5*FK8>dKa2FZyWaB9`i0--7sj#`YhR{@zu
zJ~dytpc^Ay-1cadd4zdpz{OaIW-icw)MREpY9Sdn8;Bu1nx6VR+_Z{88Rn$izRuI+
zbSN+ZU8vo;8?LC$K4x4@;az~n4GTLBn*@B1H43{#xd*Ql8X<3K8EfT{l;E7!mG-*)
zIKZrZo$Iuyp~&X8Q6d|3Y~_gNj6#8-h#@2{<~>z*sT6^jY}=7njQ+_|sv8wgtvvTA
z&JGvoP>@Vkra)&##&cAK;|$6cudkq!EY7`EEck4yC<J#|CW3^C*$CL!2DBP&)R~M#
z$9Oe!`dR6H57dnIxA)$~(HKGYYeT7ind=^%x-&%&?k&OLhdft1QI-_rT$}Xxrka<7
zWBPpZ7*(V>d<xcUw#WAAAQ^Mwvc8P}01!~hRQU(RX)Z3VB)Isl=+sxrJwZQxp7Tcj
zDvBxANE>sj1*0uB2%{*-<0gr_oiCth3}ITctD@aoJIc<*4_}HAikhi9WzI_ZHa6vD
zjioKQvBd>#cm8HcGr_2|UCS5@<k6e0424eJ-`!-CQgl3J#TFLV(37z+T9@J^-rxdi
zrC|}w(#GIa?9*JX&`2M2n-%aE_$|%E424Myan)Dpu`m5YD1Z#m)|Rux>{W}^_KPK)
zR&KW=Y{I*~g9xnBc5^*a)-;i%)50PGO4Qe3B#7WIRR*4r$!?M%$TU^lAVAD80IyrQ
zqk?3wS7Wjfl{H(ZL+o5R@W+a`)MkSiL%H+KP~F^^=L8&c)p(CqQCf$$$?5$)N?28v
zdW^Bd54hb5tT)X(M8pm%SxCHy`55Yq&~zE(Lg~tVRvkVGXL}&6nkP8nj_I=}1X_%m
znPwXkHD{~b%WYyGvOHGA`hG&a3L59q(@eWa3@f1hGgE2%W_Rj1W!#MZoU%`5vVn|`
zG@itfNF*68^;#%>J$npZ+H@+<)Bc~3T?uaxFz^j5JttE+(~Twx`i>Yz)U3x`J>y`0
z-OhaCrduMBi6&2x`|)0*Ev=kO%n8Thsx?7o(UtZA&2-=R{{SBum=~;H@iLkOMOnyP
zf@@o+EHa=+10(LLV^IR_NWz0e!8uMsoY!|vhFGyEN!v#&K2<rnAl+up87$s7LkGtM
zRuVfqyGw=_4#qt8Yjkg2hR~y`s??!-vSTsvRO_@~s)b!L-6pTtmnzdJE(C6bDF=$I
ziq~z<P?7kp6X_2OCDSdA#ZhWb_Kq2sZclaO^&Xw5)2ou;?&<Y-^E!hD)nE$=E^&&W
zmhloqj1oaS;<U1rzJRY|6;BO>B4QZ^bHS|G<r?JHlZ)gq%?jCqFa|u;dmxkNxDiv{
zKs~b=>YJyeF(FBM@M<QKv>(Vidu=DS$}#s#c(KJ15dojYKI%(#V~#$kP#B%#m8&Cf
zmQE~nv7A6b;;LtXq@9({b4T4>vJ3&A>YZ+pO$m^kH`QAf<H)iBdq|*&1RVJ^%{-)c
zTn<e>{$Qg7^-$YO5-E_LYUG@if)DIBQBCcds=AMIVD>7)DVpLk_&5~{qA)CesN-%f
zglU=Uq{5JDtsy(e>nFun>JopEw)2V(JpTY9d9I!|)I+s0NNt%U4p^L3q&F85xP^a>
zQzgWragmg(Yg@<$2=0?AepQT%drJ+cdoe6&_tyU8J2#P4-@U(f#8G8F>d#M@%DZj`
z4Pl&RACnzB;PF@#M&d<o1!(MLj^Z#$=NPIRa<IyzfXX~m*5Iwtk~6h)(66+{RD7O|
zB#06$W4h#0P+(wv(AJk<xKZ*L`l9~;aDRYNv{Ts#f{3Kt**U4DC~#PDQM+$oqyhP<
zC&*2s9QmPSE44dxoe5RP6-YhE^9qc8)~00(XV&cbsvFx@4elI(eA3BL0hAc=&gJ^1
zF_KT>Jkp>lPFwzKkiK~1G=L>o=SYl>4&H?#ZDag)znYfrdCj_`U?`vi$-j3e`k)g-
zqR6iMSd5Ppb;~+UmEela+e#snI3zV_EoF{q!^qhLWYy7jh>4s^%CeJyX_2-hB%Z(X
zOK&f__)tm?g(Rp{KIQz1BoQWZ=VuixYwi<}58Ve7KFz>?bak@?P~ZEhlCl&U)w+pG
zsKFj;p&M%w$NDv})U2bLBeb2(SzD-#QW&<AwyKn9fWe){Njq~+j!X@{KI@Bv2~eZ~
zQ2<EDq+}9Ii4HPz=A?nuTjV;fjDQau{%SCUxd8J(h31ibzUrnhD*oa@v;<G(ScqJ}
z;FHKTtkZ5q%u(cx-fHN(Q~<=lP_4^-Q<O+$4S+zVOBCMW3Ny!b&>2+YvMR;MLs*#@
z5I_g9ifV`fJ5+=BT>~;U_W%Rpog+uj0ZHnC23^_4)}^21SXNqi<$>9DaC+jkkTZV<
zIT`aqdV&;{3qpr*6q7_Ui#{a4Tpy}XvKKDOaa=!%R$rETqhOh#w=u}7%6y*cvI0gk
zD(4|6>20Gyz48I-ql=l<g8Y7}TcYyd5&-B5gdhsA{{WRve-(e}i;|JHKq0!SDJ{}8
z4DXZmQ$W(fM39^+_@<1gh}pv=dye?Y-O#Bm<0&>z0QjISF1_nznOBfGqaeB88840A
zD(*WWEzZall&0<78dxEWz!^u(_^1Q>(SYX+4NDp^k8-#hXpm*_Z*OTEaptM@)ic?u
zDCK*rw%;hrV?HVit4UdLv;{rXt_Wg@c$*mUObsA81d~Q;mhxL&nDLN$CWC3p%CWNG
z{nbDvZ+>@GVYPnio7{=Lw``9Xr!;J(NRuPFdJM5HGx;?U20mOXw**!GgLcw@iZiyV
zrC3%JJA-GjMd>rg4eFEyRPjv~Lrrk<6$rT%#2kP@KXoKhfTi)cRB*`6C}23N0!eRy
zG2;u0is6&o#(SbJk+%sKh4KYg-8T0d2VQCrBwB+JSL6}JW3?-Fg5}g639W=uo)?y3
zRUV<YSz|JyVf@gLOw95=7C6pUn-hRQFVC86XxrN?1`a5L3PhpX1Rkjc5>X$#83Egh
zR*p9S#>BDkDY7<3Bc-=20m9(@)MO9SLt4wvpEah0M=FK~1di()A)I{1aaw%>;chKL
z5;uxyvKAIc+mxiOf#A`Fk%1ufRW}TM;HUjGTm;GS{yC-zDoD>SCjbhQ+K>X_z9+hD
zQ4ud-flr;-#!^0o217gQ3uIrMe3RZPpsQ{~+yL&i7ZH|;flU5tg711cKF}OvkyW)s
zpnI9zw$MQXx`0aRHbxIL@$e%fC8?^LToT;orJzwYzUbet>{XthDHlQae158^N(|U1
z28c;MKm&?|qgifO-2|Ki&M4`^gxotnRZC+$b3~;{IO8>P3N^4?<e&Mc2<W9UT9tLm
zuSS*_WifA1NfivQEK#gL3>q%VR*W6Z@WltwD#V)EE}_h-0}hR5FTjdsQ^2iclBfZc
z;ZNOXJwdumNT>)M;*qOlXaOWvz-~XfdXj{WaNSjJy%SG@#BquyOO<W078{2~mW86q
zNs>51mfU;~s!MCXYGCdO6ai!qmU2V*t}+{h3Zs=CDkK^<I(&<D5Slz;dlXAyj`v^a
zRzpP8E+Cx8{8XPcp?O(`?fiE}tTYiL?SF!i{hFBHp|<_GO|E)s76ogcinz!f(nhI<
zicga-MyAxsLvCY^b=fcvo+~G;+M8Ry;1)mXwGs(cNyyva)n1~Xyolt6E8QKbgK14^
zEDFbRaD3D5Zk<#J5IE1(19R}0D8T$0oKNkbRmN(WVoXPhFSDf8?_*zW?VA)ayPb|!
z0uSI+@?JS`7zU%PZ(&dsq6GcotM-1y{XBdyJ5hHr7=?%XQF3Tk7m_aKC@Vpy+``fA
zL_mJcOzk0T3=`mr3lc4cB#I``64T;x{{Zh@b!~2_rWnc|jb1ytI9Uh#nyu8L4&~Lc
zj{>Ywu8Tn;xodI|@(*CvbJiMSl{dEE#PUF^$#%C7#wk^Vw$fU%1`IefvLTeTzB5^@
zYB9896=|ltuA!KGQ@YQyHMQ)pI)%evRh5jgY$_P-J<)>|m$8#^$RH1@CP>2D66ZWs
zY!Z8GkZ6H&^CKxzNExe<3JZ`u=h*z`_}42mP8%KgG%U{RcG3Vdo<$IGaUj^+Nc*8+
zI(eU!P%>&LS9*}glTt#(CjS5^2L}|lD4U&-93F`AQos?U{?qp=vZo-{t{s7a1mhf5
zD#)R^UBrXpw$QRH5rVvd-4rr9>M5g(dCtb6FXf40P@H|!ueBHAHvT_V453cPBmJs0
z5{FVTvalQ)$ls_(_>4Art){UYQpifFPzN=N)ZB^K5#LppAVM@tsq8OeyM{(aMh76*
z*B_-muG4)>pha%w9&}Ky80R(d%uL(1lYxrA^slJzv<s0njLf(^fP1cPt4;2!U7Ix8
zk8RWF`qx?XqJHSy*Z{stimw(^Lj;@SDo5gm{WASPhh4CWIa~br!wlD<qFdY{_YfS9
zs`48D07}QGmA)i;JvOpf*BqnBguc}7-JH?sckjdP$e;sPn*RW!-dW_iwQZpDio|M-
zb*9Q<o;Ls<>(A;vJ1!^6J6QD?^M2RKrng%=Yhw7xBZ{W|f?QoR!cv((bTy`=B?__w
zkE*&5rNy`rd`arCNlP2#(-lHKnIj^ExX7!U8$e@7_X2t%g3>JRV<l%`SN%1Arot^5
zUQwOLjMrb%{W)qs+{wyb4t~E_@%$m#BG@gtk7^QIx=n9I*n(9ZjMc=~5wx+#8iMWB
zSxp{o!ps54uSt(hiyQdGBa1BAC;OR`^u5uFV{I#O<29qycF72eGD#zxP(So&{oimM
zqdCo41xs5}$O%1U)s8Qsw#@80CRx}5am`Tb+F@sADnVNGJ8}LTE;8NJH`;Q04TKEL
zJFb4G(=g^%l{0n^P|1b!$}rj-N)y{F$>O&feYm!?R6~sOSg_OS`k6<J?FWO7D$?WB
zVh2s=#2z@Veuo}C4oQ2?pO5I*tCaO|Pai1Mh+Br;kZMsI;IKRoHC3l-w)fG0h4#16
zs|ckn5s!8W9M^vyNMw%aJ4YgT<p)wg^Xhhp#xXjH7qLZYm%`v~QW-~)UTf-0TdQwS
zw4}I+y!fl%OW*$9e`vl{zctT}y}eJc(XM(luzRW5vDBvv7sjcJ?>{Pi+|4Ao&lTGT
z3^}f;&#EJ6#_X`*RN06}g-RkwgF(Hp2%#<Kjid;A%|K8^tT7Xo9GWq0#|LO5?w_(K
z?YD9M)f!6e9s&NTSiO<8y8i%?NEjok%X*_jX>A$%0OqvlE<Mfd%AP7Mf8%2O{{W||
zuPRf6i!H+^38_c3%WWt9yr$ih=b=(xMpZr0UHtVOrAF-VX?4aiKtLln>bwk~y~;<f
zoZE_EDKGB63yswKh(u|EIjSuxTgAL=p$boRX?=Mj0mkgpylqO5&C=is>NIIEPZXvp
z%I!mh`=@&o%A3IBx|OGB?fswzY4><QqP_OQ_-=DkU5L`#bZ4A+r^RLapk#Ic4;2;2
zNb?vu08^%$BYq6)rNzYL5_Z(40~07cRgy*`%khIi-myOw?zOCq@Pv;nxQL=y-~pUf
zSobB~ql%u|LT$o6b5Xze+j9Q^y;T*pjNdd-U0Sl1EGnK!Rb(i~6{yr*a9Xpo#|ybE
zE2V;Vxf0vxWwpd+Q651Ct}QI!4esP-sv?FQ@BlPboL2_qDSz?P<mabDFUffw@}r1X
zAMr;`G)o9<b5Pn^+pZXZT8Nq`%HWPGn>QOC&l!mVMV85L6%j0_B{7;h;Ur0bkvXMw
zMo;p8#eJChOF&S_@yH%F#b~VLX<`gOJXI6Iq>pJ1Xx%nx5&fnHnk{q!+AyL|mm-P6
z5%OQ^p~D3QMnR*DW#CSUA5~!pkrAR~3MfmfK_#;xIHKZ=B)=xd;+Y|1D*y-vg#^r8
z+N3|pvPV@OM1&By#b0U`EMg^jHBga-<962k)lYx}POl*fP8N|ZaHajsW1;s(xkhzY
z9R1W1k|a3(B7p(!6v)BUV*?a^nKR%;w=@*lR0BPlmeN*&;TcAA*{6>T28I~@wB8$O
z`lNZXa>pm?rMD{SmLP&Z;-i)Z-WhortE-X`Ay!sZI5j*0nKrQIho5>$v)UjNM6OYj
z0OZkB8zOn6LQrxC{wk{e2}2u&UEu!!Dz}|CO_d`*ROsQ%PaK1TLQ%9s8txR8W$vPl
zSh3s18>v7bxNyh%p~tw%{xz)R2AH<&3VpmXHXo{cqannDnwm0JJ)^pSASylIpH*RD
za3GKj;d4Pv9H0i;araY{2@)}2N%2gb1lU0&j~Hm%Dn$_pjDL~1$Np-IMY@rQw^M`p
ztCWq*yAL0Aa&|&nfFv0M0;7&S?4n+Q;L?^~ag~m7^++ZGK1xmx6$lrYcVLdZ(n_ol
zvv8Ei$GI2_%Xdz1ahnnxsO(U%lvmb1+>Gj=hU8UTfSGoj@x^H`m025MJBLE4=C_cL
z%rM_oOH=|BigY>8^eLX<XV}Vj4>dZ?AxsoMA2f~b2UZc~k3ymV)++pCkxFJELUc!e
znv#2Y1BY%8WPMX*Rd-Uj&)qQ#EhABATtw(Lnz@vNwWAw;@mV<cqveiBJXWVpMwZ<q
z9Gr40*fc_XNi<7^+xenx5!Fd6yTvs@3L6Atu|@*s-z$uHrU)!xw2TX5AI6B8CorjI
z$NvD%p<D?79Oj*J&y+j5JyfC;exkOK;@$@%&1TnOC*9<Zt95?^%O3DX)7@h=NKzYy
z+s`zTRLIDLzabc2D5hp8HzpKtC|Feg0J)=CS>p&fG^_(IB8^lspO@7rV~hsNeb*hM
zayY>sbUZ90k(eHTH4zPOr)GPra~mI$_@kn2l9a_}JwI_QcR$Mkk9Dgt6~i7+b#i15
zP#kUiRd|XsE5us_nj$80!bRwJP+c$*8CSgZP$Iz-LQ8D~{m{CGSe?s-G;oyc1(fY>
zskcfdg9nUMA%(t*IOdR%j(DiZFKifc2a2@UY*b1l4S;;rQ?Q0L0O#(jWFZVuut#)6
z(w2=C7X){Th9(ZjjOVIUl*Id3f4QkZClWElWc*DTrA#e#6bs58=q6u?lOXQurL%O3
zaLvt2u0m_(NS#Zm<nk#k+{T|J&h7<6rcWCTH!1Z~Jj@*8JfP^+y@ar~Pk}6UoN+=y
zJAUDnxgKfhFT-;l#Yipp1&|Dr*%SawlELhNRC0N&x2v$13+*?=iqgewTXqjsLFzre
z*$kN=_emvTGqSN*l&fXD)UqaQ)>ZC@%zoxvWR9rjlr$*JxKMhe0476#RdB+mwjSUi
zj?g^QZewSR%)b%fS6Xjn1PmWEd=R<Ou2R}nj19rNtvpy_9wET)u;<-^;U92i&Q2>`
zrZlretqI`MN1_JE=v?j@BdU6s@==a5Dmf!(a2>#5-FLU`$?ld60P3UUB)1_J6by1P
zLKz{F+2h;eihazFB8C{`aZ%esL~P6`(F}duD-_ZeUOZ6S8Bjjm-$t#yL$t9fgfZha
zSq#jBX<fv4s@;Gzg_;#RzDwek-f)qSfCi|ei+o1sI|=bYPRz0n`R0`nv4UTI;G@Z~
zpEY}@yBlT&x2W@36lnMF8O|!(($~9Xk{ML8w_l27-vKmMaw>e0$nlCGN~Q3uLGoyL
zZGuM5qX)1ntoJtNn+o4wbzuS|8^}=SZ3xL4D=Hpy%^Nt4Y}eWuIp(YK<@N>a_dteo
zUW`RKeWd53^;MqWXWST&JFCA`Jcr;^9DP=K8Bh|Vlj4<ulDwgERk))qWsC)5CUZbb
z7E^OL=f!d=h*YCw{ZOkRbZ(@Ks8h5P-D`BVR<=XCCmq%dNYY!X0hZ5oed&AOZeM0h
zo_NhFjf|P7xj^f@oc=1j!yKn>>^IdJcQ)hkAOYP5tOTCsM>Mq1#XhrX`)O6Tl04H}
z0wrjCR)X><rGy6nf;+6$<CJp<;0mrtg%_-B!3lX9{J+(Ttkb4>8Djn-wYRaQ#+dPu
zla3GOu(8QCy~#&H3YtXek((HxPi;cJ;ybBgkU-2f@<uAFM7wEOl(1jC)ufLTM#{%8
zjvBN@ie_~%TJ?$ybIl?oje?`EH2aY&O6mwWJ=0;7#~UFWnt&V_F~Mvfsz|1hOC~-B
zqg2}2!8xfzs0Dilp$RR*e(Ey4X0{-YVm2or4yzYtgxsL(Cbo9)JI1Iu;))6+pY~3J
za^cTZG>NeooE#1+Nx+UpMgZcYjy=+rcldY}ItfFnGm)Hfd{#?TiA>y_HEy+LKnV-T
z;8rVIR%!X;JyjVO0YytOcO$HEQZ6`NjYB%LOgC_9Mq#vMVEC#6dgtg3t>oUOgvJ=e
zr{qO_J*`~Dr`arvfZzT#^7rUJt!+Iubs)Tq6qAj|#d_Ai>pLxW;uNryG<nIzd5=_Y
z6P)Pu8Wgf9#NBCn+*eMq%kg8-`=hm862noCb<MiAM+X&xyP4p(OSn5xT8~Hij$KY4
zW#TQfvs`Tts6$e=%d@4QOFTa~^i*oTi;O?HVl$5wTGqFgNO#+uFXFvL0vuzr*rr^=
z3miOZLGOy1k4wd$o`Hv7lOGS0%`_cFuQe5bqXG^vMQwF!GWH75kCJmtywM|u<!<cJ
zLk>s+j*4L*2>$>Qdli4D;Qc%vH_=|JGR-A4XY@OQ`m{n%<U0nn)8MuVtc<qpZLWKX
z_gc`)Gz&8Kj2hk3GO;JbcHp->(<TjbYE*D4OQ#;nF&tM%Dk>awM^xL8qjazgIqwG)
zT-n>t0))4aLm5!LoX~o$>mn1-s-8+3tRihT+UOQ=Gg*I7={~?LyswJ3(=Ki$8<F=Z
zqpz)6;@pV`iqELPwK&`)jTkda4Ahb5))x<LKg5u0Q={uJ%PPe)0RI4Wlh>G8U6dWx
zVqHq{HCVlcZclaP^z+Z6%;KM34*c-r)x&e(_PtiZ3;Rv3264fx)widvBE1j1sxLp?
zKhv7TvP7}nHW&{+YfEc&6zi~>^qAHSUfj#HaI$;4bn!lcBSk7z&myPO;@nANz%?!9
zq9g`Yagjo$<Z;8l+)xK~y}B__UFH@h0r;%@HrLAb4Hb0q$OM@tHtL`e7XIodV=lQb
zEo3rTVEt6qA$AbQny0Y1*%KovIIA0AWkA5~08|o7kjRRu*__n}t7lb(qgNrZ)mbF5
zLnCjL79X0(Yj?)x_4g5ga4QF>>SKcoep#<Yf#t}nq+?~cnk&}GInM@(n>SF6=O&EQ
zVTsVV#_Af#Fo^M!UNO5?k5J^A=!Vm3ZjF?2LtbrW$BK(qm)Oge+fnIrq^Y__2kNZY
z(%cC1$tzsR9D6|Gt0uXS&5gW$(bv+#PStUrstGoUKs{!xf=iZwxf?ahpxY5SpoCX)
zzUFhB^;Wlj!i=E7pdo2Q`;(DZLxpJ(NPNy?i)^D9s+YY)pCNZq`j@#gV2}kt4TNf&
zjtyN6saq3pmh#PWa0;%~qpnv8$ib+zl7{<W;}mYW5Cb6K(n?EuL6s%RpVUDycT{t!
z8RE1r63Q`?#a7>{#7M<$!!^(s1uSsRvU`DvA60E(b}}Q`@~Yu*!cXJHSz1j7;CS6y
zGIF^<i>`|}mgu(h;C)fnKJjDkjyzR-GtSZth<4S4)5w<j3(a%#CitT6m6&Pm)<yRu
z8O2iE<{OYoWldhdS>th>fkEmRmdUoS$$B;RdBe%_lCDVva&SjIQ_P`ACmA(emN_r{
zyc*=23=8vBQ4Ix#q4D;gtoCZo8Ob1n$KtZfaIDx1iZ;^fNC?j&iexlxhWTyzc&?T*
zBgqFpifydIXjOq3^F_xX*ujWCDDsF68!3pZbDGZTmc}^@iaRx=!lG{QDjT@soj-?)
zkCGW!*KDfHbDn6cuWnvr01?=!ukF$m{yd*QR5WmvA=UFp4uPkQmjSR(fl~{1NVlHI
z>ZWKzsbYRr465ZuWjt|;mO{fuoBsgpNPpg}#DOGKKYSX`#_^z0k{EYa*6~L)3hO3Y
z>aL8eh<)s2IAC{E<51{dBRJ}sG6%NCkt6v6oet^I2v`v3;&V~G$W<ud^X8A-1i!~T
zQ1c{i#FXldhKhY=$z_PhoSLS`uvJyFTNx4;w`?}vI;^IlXk(bXWS(<XO>@8jZi_L&
z2Q;Q8NYtr0{L|J=<7r=zrQS<ybHyMD;s#XM=ZYR*jF?_R{nNeAZy*|2SK0<?S8}xg
zhUwf(?yF&xX0*0$)2NV<k~~&Xokz+)h&5-SY97?<8x9Rw1_MAW({|&WntDeWLZA*i
zr?kdJI}ZY&NZL)!8xY<_IKz?TQ??`@-77~n)+B&O-7U19*yolM%juqG8&r%?AsQ-)
zKw*+<9dgx#X*wRb#Tc=d8~*^071rurLj>O?Vcka?L@6E#QF0V8apT1ds_~Lpj^Wm7
z#`Z?Bc8Kt!qgAuGW(-@tYN+3kItY-wkYupm=7v&pvl~=-H3h`28}0=2Q$T&>ZS=_i
zZ0v!?7@U1oo`rvL?<^_~!}nFJquo_S;{fGA8o$Y+EsH}o^^>Z2x@0#nBD^?%<>IG>
zkr!bjk&IPtjd0gU3wWvS0ZF$3R)m`277z;ZKI#*>lX$`U%}H<?OMUB)k7A%wOk@*`
zbx?;hG?r}}930kXRE<(hsuwk(&Q#=cOnX$4(n$yM9a70z40%^sw-L2|jVu=oWFQ<5
zx+76`cf%$;6F?|v+XZr@_ej4W3YZzWR{sG0(nh9KXv(O-?xA<vC-~}Ei61vyXNsK=
zb)k5O%VAXaTMai*N3>iW?fI-^c?`_sa(ESc>7yQYsmVPpG+7M?E}2qSJ=0=|jF@kd
zpEV?5qlLqBpZrvI_W~vZJky|rNTLS_az}t_CM;&dj47m^c4<_Roxk~}TuUUQ3NQ^q
z1L{ykDzcte99BN+-`kPY_2V_RkxX`@BP5!j*P@Sll^yp>A{4gJF((HA4Ae5X4q3Sw
zqNn({R6UAe_M^a+Mo8k215vUD(}k{Vvte`n(!vy}ByfCGxm8`Ee<0Keap|znAdQLQ
zwEn?HBw(oj08L~xSD4&J=-2?@*1d~IaE-eyjN+X>0*GdE=N{hA#wvSxil~n{{;C_h
zhI@jpfl2zO+YpX6uO^D<gv~Yv(ZQ;XM&0LE#t8FQAVwS9VZ0hkm|`#)5McdKY=@mo
z7+a=L3Bjq@NhBLoXC3}3*7_(`;Nm@m_SFL*iZdi1B|plmY=wpOouTyfbHe+l0|iA=
zTc-S}B?#<MwlX@tBTR-pW`@+_BI%u5Z{8_Wrbb4t47&?))oV0Kv1%1~E5&4?jYO)<
zIL~!+qh0?1-uqjAKXmJ|8h|%uM+YOiiI|Yw9Ab)%)nQ;xc%%I|wz5HRmxwdB;7vy=
z<RuyhlM5P?#Y+piLh9gP@y!+w?E~a(1pH6phMR1;A-+{E$Wd!>qd}J8v&JfdP=Ie@
zRg~i&b*$7Na~Ui;99CN663!Kn0uMAL*a)Y%nC(yo-=RfCC8I=EQO?>+i&l*Lt$+yl
ziW6v+@=gHmmeC2Jjy#3~Dm>KoFB)ngA{-wyER692u`k8EQF>Yr+ydVdQ8F62E=q)e
z4#y^$CK-t&<$kIaxJdw37$c0*I|Y*gs>H$(YrMkGyRL;nZ!FSE>Qe*mx<(<kt6<XG
zF<r#A4HQDBR<?InW^ah|Sw)c`x?saR@mnod<pcb&_^fjXq-P9%W2zDZB)o5N20l5c
zkxXhAJeqg<6n`U5b#2NA2gL*iJ|>kuC*xMyOM6!+tiYG&HBM4T$zzrsQQATu-ozW0
z{^}wc@^>&m{Eq0RlG)?h01O9J1KWH94~fS$$!;8OR>Kec(Q!j2Ju=Y|l759%>Tx25
z%KWO%HI^%dKg3izq9o0{3?DUIqA{L`k%7)L=BWKkYS7K^s^l=O1e;Rh+jho!q`ieM
ztzH7$<AYhGs3u=PawOi}=dZfZLde_8E;Ctp(V@DL3vAif=B>2*mUP(d&)r##u~;40
zad5<CS2YZrI3Q#iCsSq;Y|cUMqGe#CVE#Pjp2a4b84b9QsU8Ij7-ejq6k^@aG&^z7
z#WCIwZ2|s6zs*EJ)}-7|8z{pmtzec#2y#XTHIcBmRGmN=P}R1I)=BM_PLzy_C@6uz
zX%*j`(BA3&!whgbqNK!3s^>Kg&;n4%ym>TOO1(+LD{lurD;cgxT2a9muSL`?(d|9O
z878n=>U&J+$~SnbvUEgI%pOs^k%3XTI2`7a=5oCn;0l~KHJByNhNajh0nK{1qG5{E
zBIIs0<l0pJ+pf`%&2K#~sVKNY&N4qW=lyRC=Z(?Z^t`c*xjkz0QfS0bZ6yfj6|Vg{
zzO+f*I6`{{vYI5;w^r&{24E?dw_mloQ*Ztwo=EdwKhov>K6KjX_4#14V%m>wQwbSD
z5A{*Wf__G`{+QKm%n=utc2}S7ujF)my8i$Z$gg7@if~d#n{jcKG49c#E*NC@N&86V
zUHAa~imaCU%*nOn3P+66NvcDPK{z0I%_5f|rb%&RrfjGshni$hC5_(4$%((>kF>a1
zZNs-2^GiLyyAfFj<QluO34IWFgDZSbG(Dtj<nQQpQYkBI?Gwm2sg)KuhcvC%0Hd~J
zlhtRvZF2>!hC%r^ioT9vE@M0qS&v-Wi69P7@$p%;39NE!vg`>wiA$od(;B(8lq`&P
zIjzJuaL*UTi)2@t=_r%kMuc%*kE6j9Xb9s1v3gEynK8952ES0p2v6hW#ro>oWXmtM
zDu&iLICfRtTAy6qB(szlJ9w;Ekz;bsf7-bos`qLg9W4j_6q#bvm3#)qJk_3`th(VG
zkO!K~!+Q*~VBlh`qmn0Ijs;`EsLQ97!J0Ga;Wbl6p8o*U(7|MV#x2JHRR)`U+McHr
zVVz?w(eqXEs<Y*}A5?CKG%2gVLV<=A)x+u;H5nrbaC7IQ$A=uGm9x|DtrS_rint2C
zD!o+$9PkBP&2o}ihf+G?sU!i&1F~z=rNTIO$lIF<2=^j5%?=^HRtQ|Gp7B?NW*F2q
zG2*41D2w<4X!cgwNj8RBHki)YBksJ*)Eb-FYoD_jZhNm<7YEvS<UTsDCiOf~Ho9C4
zoYy1O@niP4$#Ql5B2t!3x4=cz%58TLLlHx1_lmhp<k6-e_IM-3Q$n%Y+!EcMtI5S~
zM}2MO18dNmnf<mTDW}=ke$w0GcMMUNY9&u|6e`~+jKspUMsji5in=SsxJ%ZR*pPft
zvWsyRRC7&=7L|*i<Kmfj89{;OoxRVJb-*j`l#rt|uW%rQuF;z1RwHap@-#HCN~}!W
z{wl>!f+WU6Ft}r$DO|GgpN&SrM<a|-R~IrfY)ZL9cS7)4SfYkA2Ehi7xRGavImrW>
z&9Cm1jjnc+$gBH1bNfQT^NL)MQFmiMmtb+nurp(dsMQMlgq#|+F2Q6|!5vj4#B#KZ
zUa!KrwhtwDy$OB11Now0fEh!mpl>Z>-;i+B?LDqT>?DOAYkP`uw8Tx>r-xCB*#gMF
z@kd1>-N%&bs-c1hLA-+1V_|OA{Dbj53g+b6mmQZS$%M0!R5KyYdlcQ~HegpO)f>qg
zAav~b6p&6NY&h%gzS{XdQDp|G40lARA!t^TGNS<Rt8H|~7&ioYs;KS6q_Pq#D^$YC
z7GM*ef2x`rmn(vC^+7TeVx;r<q@89(Q}@*X7q(i1#6C}fo~uD;bquhAuabML1acqs
z6PmKHxoDY6lZ?{_&B$;SS0SnHa)68uXe~ZOnjq?E!x(hN?gdA{O5H-)rpWB~RUsv}
z4Y+)3NF%^J<&RVq{gaSM<c}1rhJIPscfb)uLzMx_@P6t!(N{ay1btK{@RbS_@k#)R
z0G2{<0IBV7KWgd$%I1Yg!HCQslm42FtGZ->7x6_pA*)HExeFj53V3vjAXOHVH?%P(
zO7tq$N5cXJR;^t0L`u_xj4t2JI4a4O98m1+#|pU=NB*2V4;0}BD7hy%A5=Z<io7JR
zIjNalmmqLWOKOh9rgKs9h-0TuVq&T?!@6qW^O5qa-D^)#A(1;B)f}-p{K4j_ssl%3
z<AOP-$k`}|_=<`)kjcC$U<WlTk`TV!hv<D!U>`A3$_M@_@tvIi01`T;BK*}tkWbxp
z(}p1mki&|EG<qHPy&l;Q$W^2=m10zI+#1a2k|nLckl^R;w6RR2pN|b!McN7qP=sX{
zb_X=Ds>qDY20p&4tz-xe#m^j?YDza1=o$ebxGwGyP=VlzA{e%)%9lTicJ9;;aotI4
zbM0^j=EWXp1!lc$v&O6tf-5yR1Z`D5;5e<n#hf&t{Hr^zSuArIBFM?{NdC!1VnFT!
zNsRZJj!dJkwg>{IhutKua(>N4?y}CP!y`QkfC+gEz(OZDp=}xc&9W&2^GDfCqEaxp
z{L~j7;~5(Zw>{J$ZRyL#S=uwl(XA^+ALS|Hu{LZK0|G%_&1mgqd0|#*2^iw6iwZ0h
zVTe*fpSoitWQ5(wJ<`Je0OP^u`>DTe9FTjP@I^o5gl5rw)e9g1_fI5^@Bj!tX|S-0
zPnRPeX{T{xGG_zps)RFo?X2;VO8kE6knSt9KQ98d`nnl3*T~7mV=irFjg~;b)ooKF
zBZ>)3g`46>iYZ)25kiHW`KB&ZhAc}_24xGD115t36}b?m=TdV<>DL7>BbG212Z|Ee
z*{0gj<qzFTR7UQnJalMzB^xzXXa&Q_Fy1N`5j2X(2<Mus(eBz?aRU-?YRV@<tb_S|
z)u9U1U`?^KeVT?+5&&FnY~q$NGsYO4?eR-1r11d5Ezqbz(PC(_#C&?H&1x9rwRH)C
zwPX~-4%~8nsA-dYY>s|ri{vITpnc$n!4!hsq?Eh1R8e<uJZ$kE4tO*x=h_Rn<b734
zlr}h+PlKJcGyedMu)K<lPU9QJJ!C@>;Fcz*Kqt1H;sMiu4Qh0`zj+vxo^gZtta|5Z
zO#c8?f1qzjnip&wWMY~jMZ1C~z&o+#h+VH1<zcvZpis*cnc#{(%)&v(^KPhwVUkkl
zh&%C8%9AEg{{SL7sA8GopX3JMYI$dANpZ`5C^8g!%yKLxnKrTWR0dg0Squ~tjkT!P
z7F(MWoPs*6wB<6mAIG|_qB3;MF>hHuc&1wBS7(*7Y8N}OdsE&h_-*atggP-f9Ok8f
zfnO3}4czrb+gy7Wsg%GpEw{Mxu|)t6CZiL#?y|tK27>G++6LZt@&-DiE%N(IAC7!g
zc9ki(Sfd~*Z!}`SZRIHPX9pt`>>+ouLO_gvwtP^If;j<Wk>?c@$K0FbaZ)QQVm|)>
zH4*_0+_GE-iZ#H;b&tHXk~!R$AZE7bVn{Q=JXU8|VlP@Uxdi#8lO+{oNwBlaxd&Cy
zOB`IX=QL}Wt)jywQ2o#|Iw{_%ka`rh0R++e`=2{S5d^Wxc8G@?s=nYf!!e=!;+bSs
zE|iuzQ$U8TK=;CDBzdBkqc+evp&)ozE9B&gDIFA!GFq%mVPjB}0{I}+QkDJBWGFOG
zy00)yaJ<x4NTxu=c%pzosk9PWJ1-kWU~UA7I}y%L6}i;m#hip=s>aGoHe6r>;-r`u
z=!*~)jt^wg$?h{LLz1+Z7{EEtRVA|RFZVzM*55p=g9LL=vy<7rMq+#5P^RPBmm`zr
zriM{9(q%jIM26+&6GFRRp-&Qm5fhAdYP#MvF)K$P$n0i|vXCGw${RdWgr4@`Gps-o
z-*p?37Y*mSdlV{I4u0tuY>ABWJemy*nL(}%)2nUUii#_UiCzdJ{{S>^sUUPJ<lt2U
zPE7oO6m?Wd1b?V(5_K0buHU>>C9I_-Dy^JWp58+|C0vb%iq7iREopLOk2@QXYMz-I
zBQvp(lBJLT0EH1|psDwBhU$W|m6le{4(d(v7U7Not1iSXAcF;gaG-OKy5?BqTnPSE
z1+$NP{EWmA-{z$y;gm9vN1B!rNNtpDayvDy)8=Dps#xF*3d~6H4{r^VoRL~vK#3X^
z&&hLxO%frqNDPDH$or-`wC%|Y^-_Ffenv4-yKp!lgG|^#^*LJJFr?&Sv0k83?Ys=N
zYqdmqiQT{))=OG#taHUzK8R;sS?2mR6rd?Pj#{U=EE_5gKImj4eqS}2geu6puu#Ba
zu5@*Abf8Fg7V9-nZZRfP%~@&9>}m-sHyQI(XN#6MMf!OqHD|l}5rlA}xhEr<wAVy)
zN<D+ftiPl$vZz#orE2KgT%?aW9I5bY<yy!%=WX_V2S-i_mcvoJ(`N;k?5|k#Zm|xl
zrtJ(1D8a8Dl4EkFIR+S2kEJymodWDi3m91Zg>KR6UM0)_0KuPEfn@1&Z}tBGPu(={
zHL+$>;8iW2k*Zr=FTNjRANZ}-i|QRaP=!o#1!6f3R_u{V0m?Y>TYpW4Q}-gwKd8wF
z>}aU6j#RS{w_~GL1*cfU703dgEDemQ01v8B?y}&X1#}Zq<i^0>RDbl4*{JSrE+GRe
z@uF@cYz9;uR}Iq;Bwnei`D8eRc+`Q1#%l%ogQ-gd>uwwo*{?vBExSq)lgBlW^%tc7
z0DGu@;(7ijz%`pyk0jvZA0_%(LKzdEMF*!OS*~Mf#(wc$i7e6S`ddvS0rAf@S?TXj
z+gX+NmmqdcX8!<Hb+(Ql*jwiVyj2=}D@BFdE}Q*cWTI7Sck6L=B-`={;<J~nEX>Uj
z=jx);CT2Tk>+?mlcw*_muQ!ps?o#cZf*9kCB$y77VR9vDZX3^3?QY`k+F1VpY#Eo%
zD&8kwYq6RhLS3*^B9q;2R?%lRPp7Q+7U=Bw$mAL}+T6u-jHH&&iXO=%$XLN7_!X-3
z{*y4AO>m>|D%~y&xw5T`gC?syF~Qq3mW<!AiAgyt%})i4(h%{tBgHZZ+uX(~&fnaK
zd<d^=6-tjR-;vhVvojMf3<WZI7BEsngI#$o(h$tRaa7lhB=T-AoOeV{!S!iM!!(Nc
z0C8S<bjdvJGaryOto32rr7(^{8HOv)<&J2fV$OQ553N1hV`q2Kqe@aV7Pqmw`+LS{
zEl1l&4&ybS(XHichLz=E&jPG9S!GlT6t!?(B^gp@UBaD~f_PfnDmoR8)o!jD``d~l
z0X^0JlEL6(A;BIh2QjU|a)64v3s~DtSyZD^v_TB>!)i;MkUFQ>O!tai#t5fRBt;1$
zih|yMX^27bG|!dL(TjYNg=l9o14G^18D$BaX1ea_$11tciVjBOdwfkaA?1O-LS}^#
zMpzus(Zv{>IO2w#=aY!@kGdulDF`p!TeYRjE9jQ@+f@Gm80=K~Wy7Q!AJ0`;H0r!(
zH7Z;^#>9ns^FhMmI($(*(vv^dXdOr>Ak~D$*9FH_SE;yo_X>Pmv@5jeqPs^o8^Pk0
zZuYQEoYzk&LAZ>HM0<Dn?)g_mD_o#jOtG?xd1ew8!1}9OqjW9sje}HH_wvU0jR_ys
zXf&yo@o_K2);!UaeoYqL@OktKhGCOWbumazz`)f?OCiLBAL^_jkjMt?jD6SI7bnU<
zwPZq!`*2THQ)e?rG4~G!udZW9hE`+ORh%F>CvX@&R%xna9V079#ZNh|d890XxdM;?
ztCQxD-Y~!ez@#AbDhVt*q8W^FkE6{EA~Rx8hh;kFX+>pjG03Pw{{Te2c2bGJ6`{41
zo=bP)v3{Qa0N3~uGg@eOqn-Skz9}?B%UNcLR4TR)n&gpPfG?10vPn2Ra%pWundBX`
zt^-*8Qp+2pL6KR|K$xox5Nl%Xy#@wU4rmGV_&!r02R<sMoJrY1HrWTm;s=Uji6c>i
zocN;J*q~xYG1Uw1W=t}lp+Nwa-sP2jqB6&?#ah`*Adp8R?jot2GCpyW;EI<|c1xY|
zu@upCz-eVK8t+lXC8fxffRJ(I)b@%lU@adjij5)Iq;L*u<boUR+BW|H^X))56bekJ
z<VG?Nnsvk~xCHQNQUzF!f7sMPA-RmRWrkFW&1yF0)^-dyZmUfTNWqYif%iaaRz~I*
zE`CeVq?#c`YSIS6jA#D<BByp#og7D$Q}Ptm4+XJgBXBt8ooO7`NXocec1>0SZC3Uh
zsZv-W5U3cy^FqE0WR3Y5%_7C^NrvPp=8U#znHy^HLXrS1aJvo4a7T)~(B@}fW=6&;
zn141qu%eH)cJ_pdF|_yhO&L)QWr~QQXz`HVX=8yyZX$rOxM@{Oa0sg{K2^gjjDhB@
zzaby&qhyBofz3&3kOtg<9y+B#8*VCaF+$4T+YN^Xf(Uy=tr+;@6-%vKmw@d!#azQU
zXixs^P%RtetMaW;q8Qm&uk!fd<1~nE!ws#L9%|cH)5p7oAyJQ-0_N5$R|zN0aoW;Y
zq&pNj&rRl?Gi`vN8BtA{VvvoGjD6GXmE0Jz2tH_#h)fnNHvs<t=B@odJI^qMRP7wq
zQ-ZtW36KW{h|un3L$u*}?uvEDO@y}W6f&IBq5l9D3B@|qnn*q^o+wvoH!>d;Y9Q%b
z+aSrxnj*^JIy7iM$L68-eam7A+~58bmM9}uRyZ_BPb!Jjs*ERCtj@D!s7sTy1Hi2W
z*-zl~4!aXcW$tY(--<~fnSYrl3Gwi0BW;a|S2?0yE7;=%M0OsH4xq^USaV6(0)t3m
z%wwa#u98Ut?s3VcBFYq^98;p$*jEIwr~s=gk8GrojAY=7(Zr#GnAe6GHJA+6@&|A>
z_!XqmE@PS;^ggMg!b%okek6c9{L}rnVv4Jbp6IbKKoKL|iW*y&cgrr^9x+ekBd!5Q
zDZ6nVYA4!TBy-@?B|hOSNd(eDgWO+rum@ELP<oCT7T^@*8mhO7LRhW|`mK(s4ZXxt
zHsGaK&12jMw-qFl^;J7elr|+<S@)a~Pn9E$0;}Yj3Ibl^Y<bBvT(-^bu3LBBJX9eg
zBM5R+<K0<l7t#B{8GGY3S#cpi6*5IW$;i1!d|)1h1Vd=?ZAE4{2d~XO2!GA-85Gc3
zV-a^A-rS0?voS}xl0^iMH1}ML3w;u#iszj7O|_TWk&q{Rij^W~3}rdV;L|SHf+LKs
zK&2ri(u=!&s5UV1Sq(zXe|TVw{{WEv*237dXpTRXW_>{`O?uN6_S>#0Ss7ggtdJH)
zP;uF=NZNaqi5n}L=;vUps`+elnoAJSgDaj-R1he$y|aqm6<B57-|7`z9%E*c+sbB|
zvpkbDobB^mE;fU)au0)1eo02FMj+Xw*yVhERg$Qb2k7@%?@vzV7m3C*;;mttL}g6;
zGtD<N5@F1bxpI6{qdPy--<m@tSzv(jYDosv;QYR-SV6C3{{Skm=N>AXRgqg!jz<|>
zbypVykN*G~qSg-l1i^2*Sv~*-s7lOE?0bF%4ZNFgaE`bFqqq*EYm<>uN44$3FMZN@
zAqw0`hvIhTn-oQ-1y>ACN&BW;ar_2x-4~_@o_NU|1IQG~0J&JqcI&uuXt*~jj4nv?
zM+mK2+!s@jI28nP6+3p-h$A9WuF^=@C#s4`B$dQFu)WgWIAe&7Exof~I|2#fp$2MW
zZl5RmfE*gay0CIxxF?F=>k%?((gFN2&0wWLxT@rWXf`Vu1F)Y1kSLKMB(OdBs2U?0
z?nBKJrNU*DEUXkBsvrq*Xi)<n^>dn%!B6hv1o2C%ZMV1($tLy3V!G9XOo#Z4d}!GX
zf9)dus4+LmhussWA%-#>WpUkALnMaXMovd`L^l$~fkDqXIja!Gt1L~mRk<THAGYFC
zxv&jLjslraekqa2@|GlZ)kZ{C`jy&90ujP=e(NhNyUsC#TJ2ne7F=f)n4T=kW!v!a
zRY-)EGGh^zz@u*Mjor4v&^1*NKIY|I0ry7PUCc{tVYGHCnW2+2SGc#hc#4sO-8R-;
z&ymvAr>NjoQE-tH{{XtFt&xE&pYACW13~6zBm^82%@sZXjf-(YTFVkgx<w`PPcsP#
z1y0`-_5gt*`?2jFd#!Xbdy&T)pO)V19TO~*#;wAGTT5B(mLvf(pkwn)nGr9!?L~%M
zw}bUejv4M|0T*yI>86Kn)Es_ldux+si!5kK2O#*U*h>542{DD2<U6dVsUwCqA(tfe
z_^m#%a@R8%_*ZW0JFUiE*vLpbeA3IVNMmMlue5SAP?EAS1YprOMMv?$p$1F$iq675
z2E*`rs~j2BW5rd16!BM<pZ&K3B#MYA(0VsrlG=5)C!>L0&!x=QI%s3r9aqnOm{K*6
z&G%m6p<isX8SpE?f0F5=42n=(p1;wv%6RHbhIzf4qA})<Th*R0gbpeRqlR+Q6UnOR
zy~MwcB*_fe&3X7kfA)8BQM&Z)yq7Go#kqdZ)p~Z76G3c<Sg}#vQF>a&7;R#W0NlV5
zYg-g5iM2*Z9M_@I=_7(1_Ra>UKf5o%8I{9#{?#dBKsf&ZYKBPyle-zJ&0k!$)2*50
zkwXrFHKo*fW_53bTFG>=<9hL3?xUQk@2a5DwKddnAYelds`|t%fX3^BD#$xlh|^t?
zF$|w0%>yiG_ai4ND7))Iuro$ZK(8>@y>|LF+>ktEvYcj<CRL4cJ}gqo>L;aM+(Bb#
z^A`%CYsGy>s0~9=l=e3qRd1;Me}Aa1(n`X73Ik8Sx4S^=Al>G<I_%QP8cTz#q|(b1
z-7dppjlIxIV;(5wn=7*wOLs9@)s?uTW01OKc{Rt9zFE|qcuZ~O;y}QAn|Z4}9$|Fp
z8j|eaim8^$E42mx0FX7d^ahg~IFu(T)m?{A9P&{`S@Wzon`VbZ(W8PTa=DcM0OGV3
zk$Z249M@Q`B_YNQ7cf)=r^wf-!-5!Nwg~fCGD{@4Nu4V~i^1lZEyBkUBOl&sy7J~L
ze7x=heO60e)(rSdc?cQjnwhl_!mex>@XsUoqW=I>)?KGqRF(&f{Z#(Mi4b_(c1JWW
zi){?Hk%>Shhw`f?x45+!Q@TGC>E9wa(USER{CwMws>$9dkxn@kv-L~_TfFbk73Wjx
zEBl>@*{?b3*rxODUq_(ysV9zuH2A_f5-tadE^rxn992E`s<0{&hcsk&W=I5^NCa0Y
z<C6I^qm$*xAy-Sxp~4zlwotqtsXuU@{wRHET)^JR!C_a$T$Pkhbx{m=amgpPa6s;i
zEefX?tp1Iq+(uei<Sa?fD4F4y&ZS=nI~8=XLv<!L_SFY71a0U2tDPgBEzQC3Drh2&
z;L9A5LeGWZ915=aqz6FHC$?~-JoQCOC%JEpxHzbxXd(reWS=IPE$k(>sT|dv^o5cG
zOLl2k9%$P|7S6dnnvfzaVnl3{L&{o5P&)>l$;*I8veQ#d@UA?Hr;-3vkHuP<cS0h6
z<ONpCl_w^X9j*xYWi`#X!9hU9XBthM{{XdEUdy$PJ2hOeO)UGj;cKOhy6h5bvc?(X
zM-lB6a|>HV2e=Lo-BwsxN4Om1)wqEIY>{zUb2zs{siVr^UCIkEU&UCY(6m_F@)c6>
zvM|h<6if$eWe!d&?CGB@C6$%Yqg6R!S;?bRnS*3=S3+6E;K<^tb!BF|FVOQ;WJCuN
zVB?Z0vJgNZVxFWEfVrnhv<wu*Ap-3tSuIQ?@;n-XGq-Rnnu$REYH-xMosLh61Psx`
zo7KZ@6{}WcUDAJ5jY3t!hr!Ki^w4fCLrErZI;*kt4uSz2fj(%2SK1gJC{pSU)8KJa
zMRkb65sX!}z(VZi{!q3se9;jqC&U1ARpLb1l7t_+mN^3eL-R`2kunW!S_R6kYNp%v
z^C1pMY}MAJUR!c8@&lUAnN}t!JM&rP8e=vYI>!_^2Y2S54XlREr3tx5W`zUYDy6W$
zbZT8m1V-Il7*BMq0Fy+&lHL8C$6>3TqF<Oj)@u9PjIXeA2+dk(SFJLLWL`%`t%I^%
z6SF%I81!hEm75Gt#CWGo4jo$=2Bw8U=fJ135lL@xa&c>J?YF(6l4vA`5BrGO50JvI
zJuT`3q+UPx7C?LvQ2Mu9U0!nH%t8lPq`7XY;3&cBkUUIAFpP6ULRlgvIS3zBaj8lt
zwkfmZzG{G}#~II>R;X4JI~#&H3xU%4q=pGY202s|mTs|381g)dGkaY{Cf35|fl9&#
z;T7K}Ip|dBbt1>@Y~rGl+{A#pe2+%C3jxby_%#S$=+l-!t~v2numwN~BL}=zLe5u-
z)rsKuTPqcYTQM?V;CzN^#K?zC;yzGhRB!<!nIDz<`l(Q?t^%$TtW(w`%PO~2#2skX
z`}Zz#JXDJqkeLq{@j%4Q9OIufV{Rbf`|(hPZnFwc8QYLon$65wL)KJqYh`wkBmjr`
z9tf<?sK2x&0_S}rRme)#g?+#m9Mp2JvfwO-803m|z$tIIou@rh3a7TOyMx_9N$qCe
z8%KbD`5x%yg4wKus^ClxHj1fTs@Yb-G-4{GKLU4lO&SS(>FceMG;%-mD#BSKG92&^
zn#}2u%C{n7PQ%%)6SPP$(eA*_Se=mtp^g$6NjVfP&gPrl0}6ao)!ooG%mLL*c1ai;
zfFs2eLAJLsM#M`HAdWLoUq8JI#~ugON-bn%F6SyK(Ti}IUz2ocX#~$+O*ie5B7cYe
zYN@0Nxq|Y3>qD<wG6j}3<H)L;-`mNjt8W5Aaki_fU@PHy3xXRX$fA+(<b~tx)DbG%
zz*zF%Rl;VO3MzBpQ6W5m%$teczg20X2@Jy+3Vc-@B&38$+Xx*|dR^l|e55`@dHSfk
zA+uTbFjg4FHE{cZ9&!x`*DEN%8;}0#RJg+IegN}U1an)uF=ja#;+Yl7e&C50E!GVc
z45>0^Gn`d>!r#5zd{hYg=)J}wG07cPPg1v<-d4x)CbtUel6<?{S-()J6K)H*9!)H|
zAx8=rcS=VjRO%#E6Xf9bDI>TH`#rtDc%mhlBq<P&YMwAEvIF~~R>FV)siS?o>=_ix
zi&1Yl*u>}YMJxs1DtRNSmWf7>)At{^x5SIc7!*q@M#YE(4;4|QCBzO8$^8nrD$agc
zKXmEZ7_hXOJBx2=IFs{DxU=?MS-ErKy4oBZ#*7c%D#P}da|JsIAaHS2Jc>R?SmgRc
zRUi<ejC+Tn^;!P_REX@@F^OEYX@-k^FbpFhzU!}jFL!@!vRL-8=+zU|VO_}<EjCxw
zS?5v71P;>}feg6oqd^~&$FP!f)orEp7OOO7A;1TUZNKT(s*Vo)5!F-r{W|_dpP|qH
z023Tlxj+h%dZ#o^?S?oTO>JcTKGl7|!SY2c->2G?i_UXH{YOv#03iN@Km1I-pE)-w
zqm1!dKvf`BBa(S0y3^2_tTU<Dcs<p%&!%-V7q{(_e(K2Uus`I5dOXYcl_iv&k&LkO
zO?M^NY2|y(TSuYZ*`k91EIieIp=WU$xi;*i`Ndrv_~q1!o*Ayt=_Y}qBLI>sGpr`b
z)TzciRod);D!3(&d{r-~L0Qnmat%9Wbjs#)J0EcXJ}MbvnmGc61CGTLG+`Kg`5co_
z++7#Ks`dV_x?2GQjtVfwdm@a|RIS&<@H?n2)VVv3SIrrxvZe3EHzhg&sJCdz3g8|y
zOrIN$(eeAJEag!layX~YHW`*B%C};yq7b4ivw)*-=BKzx5O7Z%b5O_bhf%`d=8CzS
zF$l@Y?vG?Ebw6=*=SOz=c&v*mf=8O$>dPDHk~_Y}QSl#jh(rGXbF`c>^Fj6oHR2n0
zGjpB=J935$+m{_;h>igBknO_xu4y7Cl?N06S6qgX@yYX(MCrmqGKU$*Sf^YRJSU$u
zE}l#?5A_p5?1qleZ%{fkbT+};u0;h5ax7~v-B`ygfmA2Np6c7+BX0z05afb5;++Bw
zgAIyF&@&9Se#La4fE5(&YJ>ySwxkjynB;J4DLa&91e|AqT3u*^7C9_DR(@6YND}-&
z>XJYiJZ=hY=XZ*U;dq<eRx9|UA|Y6e;{)PoCYhv|qkN8ds@e)+wvQdu5?ji7HJrYU
ztZ!N@9lv#Qi1ADZMER$Bg3R363xyeF9Muv-l_3%04V4+{qmVpdm53PXtD56x03Cng
zrCXiutjBlQsTL@8v+gKVXPVYoyp3$C6R$Ot(wPpRp!^1NirF-xJ7QwTHEdZ4F44bk
ztcN7`Ok0W8AdY@D3^Pa<4ZvzktjUm#z#P*M66!{6#ZGZl_tMDlG5~PZcI7{6cCS4X
zRaY+@F%^`2Tg@!GA(Fa{mCED4G$e{Z?~zwG1sFC4SHY^%ieYj`Ijrm;&A2c-c=1=d
zM9Lv;+?;SKoc{n2Nyr{3{XK*HkYg1Q0`&W}k5gA+x1MX(JuP_+%xa$CBhNMBcW)q_
zoC?|golP^&GUNb$>(Bc3iDty-eI36?)jWANS7)s27Rep3F_H-$Dh&=WW{ZEw=d(r5
zT1x@v0-NbOOPifheh1nIlU^H37bz&Ov)1L8`)*A2&rM{Az#Rcvjr##3gXXC;7z|_;
z=nA&FBN!(c73(PTjK=<;dYQCszS3`ZAMsvKs%w|pl&td>IXn-l(0-@fPP&QR%BkY9
z(Osr;c>e%u;p+0rQ*rQ)){g>=6swkcPwDO3$7HPe{#dV4wRC-wA?#O*`WF3USAej<
z`B$xacI73sQ=H<u8cFWNtmo?Cw<9l~<rxHaE5`o-SA8(|PHwFeka+{mdhC;Kiyu{<
z)uZ=c<@{>d{G+v*@lE5F<b3ZnxsKVOmMk#inv2u+$rCmQn)F{^dU@pk0J^qLPk<{G
zWn+72{{ZHNfG50HBR(nPPA8$$>LEC8@=JYg88@jT6)u$dO`b<)rzyQZ7^v+PW00cv
zT-iU38Y4EtaqTuon&0{s>M?9euns&|oZY)F3F8#HUb`iY@)*fH@+$2PPirEPms5@A
z#@CbHZKR5H-bNaTKr@}A6`1t)s+VvqvGQ@s)%+KcKgT#V>M`Vum7~wfEjndqs%#eO
z)wU_$_g-mtZ*c_1G7kp5nSgkhV|HuLy=QLL(iyhkdE&X=sit`$?nkt4&}Z2#kETD<
zckeW76C4%bR?f^xBtIq$E6KF2M8|X;K?+U{dPk@414h18g*T__ee7!cLTfbs-2VU!
zlT_11?YqMu0Z6Yb)crELW%QDeSxMtH?TZ+f_?n^hCZ3a9NEtJV(W}!e@TBRLX>~6I
z{{RPzYAG8?p^#uzg}ghhwhpnyH&uxR)5XzKNog=d+FWM5l;b7weh*UQoD)S&-K>z@
zZwyGy6%z?i1IQFi<z!Qu>o}9zJ4R`ot#VZ4=nHs_j~rBN49$cT?QT&FBn8Q!^vP!0
z4mhh+TaM%;AJ}A(v{B#_pEX@Iw5&@R2>emWe6mQ<xaDb=8{Fko;*4$#Jg^UDP6Gm0
z?xu<hc5tCae(G6faUM3;S=+P!0F{n0(5G^BY>a~W*4=iYCW1@p5rYKDt2>C%&JQ&l
zcG0IS2jZF%jdCNhZ9Q7%LoAK8zN#Ha07zcNOKCDkI2oz+6pSBp9g0h-k-iL-*laVA
z%~d|3GB>t}b6RWETyMt}Qo`NeBn*n_VUn&;CDGb-{P4=9`3A3leLUoBT#BG*i^x5p
z)m+(0G;x8ms;e|&`ABG<b998pRY4f2VVR=?YLaOCg_J1_20rLGcLq6Jiu&{6pDtv`
z;m9s<)DE(tSxTIWu#iBgkDRxfg8C$AH*F{CmTJ|IsEvqyz+#^xG=nD@sP~j&S%0dM
z(UaZ{C-L$sk`Sq7;PFx+E8jKO%^S^#&K8n9s0-ao0U>O#z~FeTK8DQJ#kP{5a%(hp
z1`VF-_e7ppm)cA+KZ>&x<b|EINhHOKjCW6H(c~vQEko>p`$5`IJXbDN_|N9A2x?9p
z{iaOu#wqsEDRuxJDjQ@eDQ(%~JkooztgJ@_{Zwd#rct?qgOS)Zp1z3w>D!=dL#LE|
z!)a+7x|zK@E$rDp6h3O1q8Thc--$eVschYxGK`<GUWKK4Hr*pC<)1%_xU}?blxHgt
z$^267lj#$K%xua_xnvmI)jrV46~L8Sqt$xMUXif}KEmYD5qeI=PZr6VWs4_-4bl5B
zCRUOaAQCD_Wpq!3WRE!&=&^c!*%u3LfK;*QP<VwY);yo_CiwlHZS3y<03if7)jUSh
zuGT_6xUWlH2GsIkN|7`xZ2ti0tsiSA{7Jq)XOO*|zcuz4l76bUQPFOq3nXwk-Ppx@
zn&{Ts@$D{us+n|4S2%drAJs`<%KQ`DkJ<C=Iz{W`t*n^-sI<}bDAQ_MNrFDB-j0iE
zi@9T5e^tw;Ly_?x@j+>FzX10lcq(XGw76HblONSczL?d6f%|tW{a3!lrNrlqf5k|2
z*n!4J{89ZbP<xR)A}>v9#1Z><$Msg4ZiTGI3~Y=L_^)LaF^&iS0EIvH5wG&#{{U20
zmntW@6V0vk8_#GqFv65**Q&&Y?!6>g#0NCxvyZXWI@0AQyA#YVw2R^c2*E#rPqflz
z2+urj8ypJs;bRFs*Bw3v;~A**xhS6MJbvXG8Hzk)<m7QxI^<$RS3&a~=Dnu#)3%eb
zV2hf@`lHeEtWNge=kZlf6ymu<sl}dEda?_AwhI%CQ1<a1$|U7}s@m=~wRX0;MiK+r
z6awJQ<S6Q@L}LQ1cLU&diYI(-<VV2ARLMCC%rVU|j8M$487DOYSVwyz*%-Ek=g)Pa
z^z^oq;qFnGV+0Dw?75yo=@}tv*G9OQJ0M-H^-U5QTZN4a2)IIc&x#fOzjP3Pyi?H{
zTYaGAk97!8hud%DJk@~?Vg4bpsv&F@fkVmcS1>U`Gu26^g_#|hut!;-Lbp~~3{nO?
zbSo!#m~eW<Z7xtw{?6l*=Cazwv%z-B{FDx<BtQn3N=lvp9n<Y1RgI>1z#R`Y4-9><
zq&9l4VnxTbjt6uA6?VBhqFum>85ThqDn>!6qPQ^3$k@XSe^ip~NmQpR=739mDn}@x
zoMchAg|e!7C+ewm_d}_uciS1QJn*W8kp}hgRwuy_>qPc#S%ASk)D4q#7-ZDRCiNpf
z?MTE%z?Gsk5|qpC2RwCEI?^)`cnmyMhq?@#_hb5@?>5dD50eo|CKQaP?a7$Pt%@xI
zK_oL|J1b<5y0X&!HLGfOJ7R#M6WJK8@2CF&PVH^V%Y7kt9PRO3-AAb4!^3vXxII5E
zUTTqqx)&PF(zU&*AAl<BOZ0xPceUAluk2T<SZMY(#0z^=2kZ*Ax3HOx0{;NwxS9QO
zf5B4xooqgj59I9gy))2qB&#jN;fU}nOKs`91UY0ZYhQ1pvmc)nd|Ffy<2;(><n@fw
z{!HlO^vqHICQ{!@w^NAVAMr-Vr;s>+WBRR1TS9~57^f&qU?>%edfC3&sybLBJ8!4H
z?~-6ttEI|G@GC$Nc*S!8_f)+3D6wLJOBRsCXMIT?mC4-oYybzm3U|k5F_TEMO^vcY
zML+#B2l-NsGb6gO$>@qza;L1-f2p1JH?<$4v}pkIx|(#rKQ(9=m8C2^3UAa;!w0n=
zqM+%@+!bCv=xdD@Cj>M|$NH@VssIdp(_<=Blie8V^CwM;^!VDkF;~8lvzshn`5&QK
z4SV$U+5PKNf;<}bR0ACOq>?E5Dh+PI>iUd+Ou2nK4o?Tm-k|jMsb?z*A!5H^RVzYc
zk8c}=eOG^^SzV!7Vpa$2Ru9#Grgql>r?LSY^MPIMC$GYMWK?nWK8q;xjh-OMm^tG=
zRP8yF-vJt~Jyq$=M@N_ZkhxLh4l0_)6#d$j&hM)9SaQQ6xU+|s4Dq^>qeU*oR1x{^
zogg3$hx?{S?U;%m<x;Xm>;t@pttpiP!b99_W;rz_(b;r}K^WsT82HX4QJhrl=0m;L
zKUFLQexi?Vo?CD{c&uf@`{a9L1%9hntjMQMb=-G-8q1`jCm@UxJWya_CghI=veeX!
zXDr>**Jeu=IjHvTVsr5nY*7#STp-8C%@=I(o3u<|4$TV60{e;gM(J>@*J+%CK!%#k
zV5@EgeyG+{G6zx@2BCrtyF_?D-8w*0P`TsrOfW+2VgjfyPGc&#R*#Y5hC?BnbbJtM
zc6)<#j(%CFLbqB%VtEyC!^LJ=LdlaE&U>w{xd;|+%D}lDR&Lf<N{f4y3J;OpESMQx
zorgSRic5G0xYKS>anY$}hupYfA0SjLS(vB*V~P)u0$ei*K`d(PN0DT<Ye`0T3Z-;?
z<#y!fgGOnk<GxtQ<kfPGj+vMBE}nGVLH2{~vCUT7xROzd^iNRO$A4(*KrW*>uQ!kw
zXLKADHJ_$4Ma8?Maw8xm^UY~(ZdyPA2dc$cPlrxUbIok*W{yaLfCFQ*R>jbvimfOd
zo<6Bno+QHZ3Bj(jxJaXQY-IFny~SI0-hZQ3Sq3DE!J=h6^Hkc?1x62yj)flt`%4zd
zB>YWQYhb_0hG|&}U;&7;9w-@HF)9ZcqU8gjpX!5*Vp2^~!bK?Cj&nunt%5}eKqZTD
z^+oALOJMg(K&zNUvF-yOb$jSpRx6^Q;}ue@8+bf@RJv8y?C!S}AQQo?n*47j2}K(;
zSr$C1O!ic}m93E@1pU_g&^Dmh$u<Bv>b%#|SLt;can38%KTlRk<dP6Dxv!sTVDW0U
zw0j*!MLj>on~fxoV6U2Pv2vEsdyd!(-77@)$9j*5?x_7$e{D9OA{PMQ@J)B8DvM_#
zG2GHT<JDS(7y6WskWe2rn7T`Oc!&@7qP2k=IgT^g@m29yI&x(oSDJF&CpP;%CMa!+
zL3T39r!bN>!L7%oJ!G&E6#OakSOaY<NHD~xsIHnwqzVTWb{z7?=`yU^c;)Q(>pxIi
z+{QhrRf+u6=6I!HlapQxVW{8R9q_vhc&&DY>(d!qd$>mL{ngONsf_#J=4H`NJg!eg
z)ZPzoX;4qq2OWj2)Lz`MrVpG|d>X7ei>|;%dnT_fcJ0Crhjpzq(UWV5=YF=)mrS}1
zY<>(gipaF)JAgntt9R@9_g<m~&IeV9)S@`|BOf7LEo5S|w2|J?P5WzcF%n2}e2OB@
z(jwcPM&V7<cQhN^Wa6A;XxwCRT=_P*Mzu}Rd!^sLoie;{fu2Qr7N2_s<OwVip_d%3
zc{rV9lrGVVy3=NRci#ksNcuISNv@We)8!d8S~ZbwTr<@zZ^k{jS8G&SzMUQQzs*Ae
zD=(z#?|4KraUkgZ)~8FoB}6k0YtmrP4Dh)0aPnf7DO2q8{clGadoxCvQhW+G(t3e}
z7^XP?0Na|{>l$o#F%T3Sit}GkSYB9P1&vNK#d9?pd1&%`*q0toh77Ay4{syq!Rb~P
zsUrMbW}kN|$rD5XW;|D)S>4SLUDC4WyjHVFxfoJT;zxDR(`uofHxm|ZJI3jred_%d
zNHrFff!g0yhq;RGJwz;QeX@MlzU#Jd>NdtOpt<v2SL&S>^G~@_(Vdy>3g>II(uCef
zt4!;9eyK?;b9{gmc<r|^$;AO>ddWFnM&stMqKr#!1~M^KH<o*446BS9;=)PVXG3yM
zj=hLY@@{GvkVd!znmp|m{P&8knr)6SMLccrRd(qOWddA;!Ktn%3kDT=#ZLo9@s@mM
zg^_*dAx{+(x5)0g09pOkE3}S#saf4uY*}yWh!IAIW(PiMMTKBe%5$2v$|AC<R^Ut-
z`ud|T9a=6%IHiI_Gm!rP?NDkvoE1(e-sXjn63rSp+<6pScLIBJkbZ>)6_W$LUTV_T
zG?o&I03xl)wE<NXH|J~wo`q-d8coW4){jy~F|<ThX{;p^60#h9R?R(EEJ->HOQ9GW
zS0sH^wv~1+4%Qogx~5W=>Nfyt*Gy<+8}YQz%@wq0yE0dZUMwfbP!2Kh0|u@fv0O)i
zRhO*6kH`J%>#{yzS8gU|M?6uHGaGl0hU1D6$YGEGK4`r-D*_4Tm2!YkT!I{_K4}s~
z0|7Z6XuVD~j%F${b3uj*gtFs3R(HTokR*pHG51e`K$4C#QNp`{!wl2Vh2V^u36PrO
zYpDt`M(A>wWKs{Fs<(a{9N^J5Gb&sSq+nA)(<4T_Z7#AN8j@)WxMLnOLABFKYz{C-
zMx=^qb^!Q4S*<t80tLbnLmp}I2Q7i;%~m|j%nr~P8qxY=Ou5x=R0Se2*`;l_wmp;k
zEHn)|Nn`>g-cN0c-D!Fxaj0pI=K7;FjW!z#Wk{4~$)m#UeppuAT2#50ETVZ|NJ!v{
zHTb9)ss0!Q0a1QUDZm41&1+CgqOHV(8*)3RA<jiLa0W+o^~OQ*=9`KD=(2YVQ<Rne
z08KQo-SMeVfytqC5i^Crr4Ge$PcM_ofDJ0fPbfYkvr_1TP~!}H6*_^qO8)@F_^4+i
za0`lQ-M0m}skd-0iJX9Q;GU^C@l00v1K6kIb8hH;QQJhjA09%I8x*^@9I>d#>jtD;
zE;ouFkczE=no`XpZevYs6Y)G#qf$n5{?z<-0G+XWAnwb3R6)D}#We8PsSu7j#UMt5
zgVjnHf*Z{wz!n4wMe<DxvQa2v$8a1{Nwz{4Xg_qsEzT+5#N&_di;WV@&#OHjr|U5`
z#8{`t@)hIWqxAlvq}&@<jNl)O{nya@a>ECjtMxvI2D4!$yov;v_^|G=YBb5Kk7cmQ
z;h#BNJ1*3A%lS1t!YGw@E=Du{^<(N?27MAnNx?6kGu|qpPrDY@IDZ+gax?|Sdntr_
zKqy}S05u2i++D8Ij($d>bde!3hW^bBZEo()$gFnuXz-no-dNl(vaFnvIi)<vvg5wR
zReEybHh_oD<sDVjVOkR=Mn0<B3C||{mp>9PDlOqg0mdmVBUXi4GJB>8PqnnCM;V~V
zOYWm&qV8`7sI|8%b|nKN9MzV|grq3it>&xsyH_y)8@|mfkV-p=k*+<&!kkkK;Ko$0
zBt5k<c%zBFPrDyg3@ptdWf>J(KoWM^M(neF)1tUwFjtdPzqHZsAbXtwLY69iYxD6&
z$Q1&gd^D$N9ahHDQyr4!w*o5-9IRh_o;+5r#^nu&X8Pb!Ok^1o##rNn-9F{_#0GNI
z4WmmmpW#BpoMN_Kob<~V7dPD=cGgW+w;rB1Hfq!9a%XiEP-vPh_4K<{Pqt5Wxzc?r
zWo@<RUvcX4+TJ)-V}dZF@G9EFOp;Oy3=hqDZEvh($-;Y4=(L`h2za&!ZETUCNMZnf
z1zOo@a>&58R8_UDj5hmnxinxao(2VQGV0u&MC?OOmteNKbTJS}+<xfD)li%$r|tk$
zZ6|;#hd(WrrQr*YpLi7~1Y)B@_^D71!RDknB)Ag08S1+~B6H0#n1h_qZ6(PP96O{O
z=9wnoqZF#@srjVdo1oD38SjeW!K7T2Bp&G)5WEg(r^f&kob)-Qf5_&$b_PvG#fxXf
zB%Lb?M`gECmgfh$kgvoynigg|{{X7bpCiK^-H#uUJcW({@k~9^D;5Bnm3FZJarah6
zUgF#ijtT$=KkZU32?4sO%7FOID`k4CV?w@8lrH>6c&@?gG>YUY;8!Upq0I!*D}YT;
zNwU;!+B=9*!TeX4db{-N*kgC4Cr2ekdg0s#2*zofet0-4d#&1Ser+t1gEME+<IftJ
zK5DzSp4wv^kutdNBAA7P10djduXXibrL1*CEfi{#b_gBUj{2ufyVErT4AGDQ;Nyc{
z=hMAk9<5hW`aG_?(<i6-E>TjbUPI0XXsOo<vBWZQoYQ7e?IeSY_e?X!yry&2b<?ww
z1*<@1wUM0Qipvm)#64;Ci8ouF%s}&5%fv#!Mb<pgu~@CPFr(Ybw<eo%V=g_l+B}Nj
zNGeIm6+CGa!OlQ$6f978jF|SPUTD1*UDv4}f##w1f)OIW8Nn28hlai~?D&o<2g#%v
z1eAObIj#f-1wKc!T#XmZ4CTc>&)fEoIeg-p!b@yeZ!9@uL^P`xCyIrGJE&8PcSFdo
zCEFp+D3DD;XxbJmb6F`Fh4DVwAoME#08)}P8%f1fd(jy)7|!1&mGBP7+B|F|P@m;t
zfIg@xV)og+#>06PEVD?LKJ~Z*9p<5)<jl-nM(lG+L5(lPb`GjLX>)i8Z;&-C9@EA0
z{{Y1g3Msn{A6cSAYONtzS9>o9y2N_tMO%B5a{}dC`D4Xyty^pX4o4hNw<?!*b6ZG(
znE9$QN1H8N@Oe$7hB<)Sn$X)y(rslp$BL)bY-Q8$+BZ@GJQ_02Vh{n<Bv`a)q~;|j
z;&Dz^-yYM>C}`wqB2Wm$8a=OMggk-GS$0YwhYfHLo+wRc`LZeHdK5wp$9m*qfz_E1
zWinXyYBCjaF5Vb?jaN*#bCbmzBNrJb<SGeKO73ILR>%nCHcmRJZHDI?PARf~>6KAZ
zY2lVo7<WoS$Zd@Q&P`7lNn!!Nl|XHrHu8QnMWP09NXPq9O(wZ4g{E)6PS}fuR{n*3
z5$J0%Z)NWf<owr&{*7sgsp{bzVPhMs=nG;kq6(@)D8R2DlTTW?WIZHvUKJ3XYWLW&
zmpox>z`aB23y0JujJp-wK(9~r&a$@pWTa%0I2GlnvE8q{VgQq#MP&61iYX>-&_lUU
zJ*C8M*_85YuZ)bt*iuDH`#sa-MsfHQbZaUu#kOKkoL3w9d{v!gI_#@knPO0`xHPRa
zPT76{+CU}0>e+OQn}fNdBl{K7dVb}Oz?jdRRf`ng@e|(1)s&BW7VmC8TDemWCCAFH
zAJWpqH!70eO)>Oa^iUQuf7+kglIbveu&PYj`Wk32ZImz`<bD2Y&=w?1Qpx}ruQvTU
z)9<35Q#?%=2gvTW-le>I4IRv3nTq(X#+AQmIe0l5h_7;@c}J?Xc$?Hx#P9rD6h2Z#
z+@ug{i`4d0TwZ?U<r$kNHE*Qb6KWNS&MS$mac_EkoxC{IV1g_r_>?2fE#o<m1zb>Y
zMJ>JVT;(%X@+!=CFF5+FGWfeMaaUz!wXMOpF71Khr_?9BwpD9vw>*kE01wB_Na_BP
zGf49DZ4q<8U#9M-X(O4&)1F0oHJr&Kq`zQSnn4_qjH$(Ky)`2X7}?HFeAiRa^1pIX
z@Xlwc!BUbp5X^$#IL&6gTP?C$mvE)eRA#Mp`+XgmP8o+4=CezGaVvZB7EJMzU0r^z
zI5E@hm^6B4mUrP0-CoMlZH_WmJ=TZQSH@%!f<p?U5-eC&QC3<hkN%>DJPh}W^ZuKa
ztm!W&TU!XcT^k?V`%!|!pSn#!EwwuWvV{Z=%?+x=$!OB4=y|MF)~%?tGDo{;>x%T*
z^Fs_SeH?5!rIJ-1%A5PREOj|7<CHfw72U!#Sq7;^p&G%smCF|Ut6SS&egb3QfnHlb
zwDMdyO!QHMP+R*TSypJ9Y2;O0*6%!<LBY)xVx|Jo$(_cFNYH%f?%4PPQC$&7X7NA)
z$>X{X$_tO(GBaU(RjbTdKvo~sG)C$-F#e4y$oUH6mP0kMi<UW{?h9_fG)>u;tel!1
zW1c^gVTq#;$tB6d+l_Y-%|x!33{?*zn!;I0RvSeny_dHkF_WJ)YDso2Gp<#jToan@
zB23J{oMxlcruN1jgtD31KoB14)fXsOijyoaWB&lS<Z)TeT_m@hw0x^pn(Xe3H~OnR
zt6eVP8C;sPO?E7p9l9gor8%oRZ)X@oR1-sUImB(WQQAaT7V=@qaravCLgRJ_OOmF_
zK=C@i3<q=_!(<dZ)S$8s6ywbd?zt_TbJcxmpECfO7xywa6)b1lj5csZ7nsB><+4o*
z+hkwOSJ46WF&oCSA?L`d`59db5udNcS?V!vw+fyGQ>pe}<fP`Q$hjc&F{E2S`FS+O
zaIJuAi4rj+<MB|g#eO>TRai%tB;;~GG}xsHE=c1w6Uc^KZrUn8bNsyWDsG}tu(G*E
zvF8~2r$shMCAd9)DvH`-V+syG6iuwGW!epC+?uc>L8RKreSDVDWwP9w_Mc7Zap|@x
z8Ytb5m0~|jG_jL=BIoFu^rT>|kz2HwscwC;jIzE79#5f)U85b-8|L2;PGn`rJ=XfJ
z3JF{dssXN07xP@HmE`qYsB(UXng}eA9PkA=0($D0xC@Zq?N5)cd#U{pP%?0|{_qIF
z6u{VgikS!}^GA{kB9Z~zc&DVS7hpIP$84VKfnB)s_e)%I6PRw~af;$MbK;yCK?&Y!
zS)pVc@DD<nH*^L=7{mOJ6(aU=Qb7{BfClGo1y6NjHK<jPV3C?vk#3?Tz&^v?YosNB
z6~JH|oKxc}J|~)D2!jKvk|xJEraJ=I9x7<0kK1Kr$?#}N@DwAGTMQ`>>~EJMoE3_L
zid9VHpA@!1<TEHBaY*B8f#lML_%6}aG@Q0iRS^d!K-vXJsHgd4eN;#{Hgk%V*_nW%
z`5?`>Z;cUd!DUxrkPb~oJ{jCH4LUbnq!G`G5>B}yXvTiJT3hNGYTsJhsysvFK<>N-
zSxe&s4VwCgT(TFMMZ_+-R#RUv`hwl9HEH7?5UxP3PJB_tDv7QVa$JuA$`y9Oq=QhF
z*&QQ5RE!#~dF43EXXw<{vcweopUcfsF>|3@?#T;=HKMbT%xp;i0BXwUP8#Hu7+!^H
zEhkiUjn@Eqr%!;Ogt=(c4{v!VflRrP$jCy!V--4FrKX|oC=v{03T5TQvhSB*3dcC1
zb}<}m?X)@mg+-`AB$jf&0aQ~Emzpu08eLP|jsk=h&(%f)6*{ZQ!61W9o=gQu(`g-=
zn&ifz*@he*1qSmr(Cjb`C6H{Be8xsG=9?wKX#wvZYA12KdyXmzC0}$H&M91|luvVY
zy@II5D_QC2CcF{D5|$)ZKTfrl`u$;zOS>Nx?f#C@W74dUxCUZ5KUK}v>*J@7<((ZC
zidp5)LEf6st@QYV%Aut^pT%mdZRNRek*-BEMbqWHm~cY*HMOy_iq0^o27H?FdaY!o
zEz)++U8mB<Dw3oNrfDe}hL8f^RcNZJHi3%ZFf*ETe7`+cGE$V~$+;yJz=IvNJB_>=
zOO3hs(*|>jt-Y>-m8&F7)QGFJx0;8@6)anU&vfzZSq_gq;8I}jEu3Px%LCR0M#S#O
zrjw)uta0QMT)d1|1A&$_s@PM7rQy2>!;*2$JSvsOIj0~c3F?}Ekf@SM!FDHH3PyAH
zT`3vh)Mk=Sx*01TNXInb3l2D_@JZ*YcsN>-ci9-Hk%D@p+s;^KkhV@~%0@Zvrw_1n
zD{b45Fe!>hU{bT^<It&=*sLD*{{Z%=-ym&8az`de-PS3)j~S$9CBGHK?pz+|H_M=;
zBWFBQkgy{hQpL4-VVY}_TO*1qg<<c4Nn|C4;ng5z892^rGR3*`N~%cWj`rjYsUaH~
z;;MZ`pxSFT%(F(KOnh4$)wXbcLX?txU{hnt@y~R4WRz5uK98ThL!sX2`kO;KlMa3-
z?yH_C+F^hJ`mcKRK8FskX2{Atz&<Tr8Lnx!I$ivbOb&N(Ui;ELRcfQMf4P6`@*b(_
z-oxJy+W!FbRcltt*6s4a^I3^Zai$9r`K$e5+(O?IkGjo!i|%2oxn~WJ4%>3e@g8ax
zxoz#WahjS&KG9IT6Wv7{Mh7XD@@S?8bIa~Wv>X5^{{T;!M4=V(r@F5O0l9pUMd<Q~
zCr}Ovq6BR1Vp$o9<7lSX!M@>uZLALf)CqJWh&TGasEcT%lQOnLFyfOYU8Q+)tYr6a
zOKw9ngD1c=EYoECjwq>YWoW~^hcqnK;{zukcS(;rZkV|9Z!7f()#GP5JXHi_q`dI8
ztGZQ&$=t3+I2Do-tjs|hd8^rbCTUSYP<f#weZR+(OLB-NTt6Obf-G&78^1IKI^9^K
z9Bs`$G!aUhdShG)<nxkxq=Yigiu{6ms6=bwUDk<}<O+;DF|fcJo4O*^WoVQqE4R9a
zSYk0TZXJ4{)uP1A@2zYjy0-UGf5^P`Sc_?PH@4##HR@8T%@)S`=CB^7)7r}H$fM%N
zIjpTy8HC%&tbhZ7_@gY?0#0&2HA$ySf&p&o-Ebq^t--4zC`-($7$Wmkx`W1G{v$qU
zjgoELU*qP2)X;1N4MswwsrNtl(bl?E)$t2y3qF4p6cET`+<LD|^sC8ZrNt}}0&$W`
z)-7ItW;ToPV)q>xYY)?pQC$p+PzQ|Fg}>=ux~?RZQ~_S8=}%Uh%aya1EH-hRR_<6L
zk~}*Bk;gUVbzZNn#O{?HC}}Xh&kE7K4DsKh>MjI6K&`8K8%StDwS`!L#}%T~v;!$G
zZs$MUCXz1n`9j=6%z*b)!`3op{{X1XP}5_F?IQg<(ngPb@+>=ibzX;meGlwEhz*JZ
zPr1|GLpTff#aRKQ%A+sEUmWm$OB`R(gw$<Lpi0UP+|z9|3t%v-k@bq*%WtGA<PnM)
zu5{=@_F~@EPDP4t<wL2?Dw8u%=@#IHSV#O-ifB)_3kdviTxPpz%g1g)o~pv$=ThJm
z#a6Oni{TNuOSTOcr|u3iqNiU@+^{)Ok=0nmJ?Q0)O1C$e{&=fB_=&ZBl)v;F%n(ot
za{6xX0e~6&)wA8+md5dlkM|ek{{ZUoL-i5zm}~M1G>u9n_~3t49p0UFBwHq6bKuc$
zE}(P%e0@-N3wG*5&&jCuDJn>n&t_v>&@Ap$#QRo7<PlX?dQ5PkOymmE$!YD(T1G6#
z&S`?)GN%XqtCOn#0CoP_Hsa4H#;ng=Xd*-`txb<ilHMF5=C5XUhCHeg1sA8wZ*>;l
z1AS6r(yXqZ14~trF3f9NOoXsgO9D8MZc=M<`dL559m2VE=)(U1_P;N>(ETYs*cQG>
z&BqjMRZk3jR<}#E_N~==$2D5oW-eA!3CJ~bF}XHmTf{@-8Lj$F95B0GnX_sok#fqf
zRoy#AAdUDP;<B$5{HOdtky-<%-j^>dEX1B@n4+2}@F3e$HkLW(a||=f9BL*}^u0n+
zoE*{G4V~q^&@%zS?yp`ziVV_(A5|n$Y1?-+{M5&%!G+r3lFycEnHIIHdyszBK?BLE
zXrqV>vS&4-GU+l8n+`t}4EMTpOSU_G`KvNd6u)&9#m+q$S@fu;xH2Li_^IQuE5T94
zTu(gGkNbN~fcdB)wU{|+8#(b@ZCrHq@a(x}$oOoiyV0VUZ&9^})m6`@O$j+tR<>a_
zaZBwZleh}NmGD~~RO8<$^2@nog85_iq&)pqx?Hh<7K!-$R^9CF9PVgTu=9#{?BE?n
zxcbFeENRsmAE?@<j0*0!C4^7MTvJ88-?{$)qQrky=qUthc!_`Fxs^kWv9J76`*aWW
z4+<xr&ueod1xt3%^-Bhmdl@aT^Z2hr+l&Dk^Zx+Gx@0MgDE|P8e{YT%U+d-cd8O`y
zdvY>jC;h2l&@U~~O2yChUXHtu8$ka6YDY)tjz6lgMeVB>_0qnKB>E-fu;fRE$5lx`
zr8OyJE?^wjtBCmFasJe+0V;QK{?%qExb(%Ax^Kbb_a2hg7Xm1S8E5HzQ6mB4UDfJu
z3*&*2{ZwAT2cw_ru9Iz0t6ekZ?eXP;eR-<$8-{KGUiq$u=t%3de{}oV079AM`LD1(
zPYyuQv0euThHr2&RlpQQtH~-DzPdDx69qWr{nE&UE4g`g2P3Mfx^kB#hy)6=mGHwO
zs;$-*HTk3GJ}W+6f`D#i8;M@%aWTQibsU72%|z~Vo~t`!5|$j6?vY-hcFlMCGfQNk
z%So9f&g#3;GcCv-Nfnrlh#=Ol(QEH&Q3J><D*Rt6KztuS`V!G?w0MDD#%n~OH}1z>
zim%Yew__5HFoKQ+m=XpnzLjudWRnyvxdsIw832=9g;{WXS7+u9I36kD226!OE8c10
z4$k<fk%P&QwJW{=7~-QuOh~}oIH@O$fsmwU%{6v1=OiDhdr2c8MmbuX*#uVDGDb<M
zBN<)U!Ob;-;}oOfqv-KP5F!<giDw!9syPa?kr?8*H?}|>9_i<12GfFRe<2}Z@))Vz
zNdUVX{%IgZ4q8Kl^-sH%WZSiH1xx0LF*2?hSdXeD<e!I<LgN@2p;VFvd{oULU=*mu
zM!6z5s2~z+ztx&UkT&9-8t*tf3S_JUq5zUkF<e31HjeyMS4E;z?R=@aY?4g)bt*@b
zOKQR%Bs(@R#YXN~ez8uO2$UV$uqhap7yto6KVb)&;Kn#oMLs9qfxhZMR4I*+5%*F@
z?T&^qR7N?YmC3)4(Q}qJjU>7IIsgIW^HUa2{{U*CQMvQgao}SaHGglCl0(Vl)074T
z4lzwsA##4H`&DBNoMxurP{X*!(e?;AubMwp;`TjJJYZlFO?@yJDetjgJASc5Km4g@
z2cN3Jtm!iRMkYByj3(drr68+-qjqXZS|J!jA2Z!S8#=HkpyTea*>P#6$YQ%dI{Fo;
ziOlZ20sYoC$g<9|G32*ZwzI<`mQbe^WM2V6aM}qQA^8-yODs#Y^1ak*{{SKom8V>L
z&kl2ujAnxvWN!rSzv<iLQ!eG&3J;gjHQH(+$UHElxK#kQdCyfD42G<L+_JL(Nvf-n
zBrFk|a@B{Zh)J7q<eY<46Wo-*+)Y&EN)Erhidfb#pkR2a_?B5E3WKwOLr(Z0q3*Sx
zr&?T^hNQDfxrsS7lRjmR4otrW7nW)$+<u;Vc`R*K`sD$c#~B@0rTTwQlTe%h4&NrJ
zbgNgnxQ+;yj<~N;(JWI>v{VW=W0PJZU8`<RPquq)7L`URE|6V~idn*_fOXv#aCoMp
zZW*T>V!0eXPLD?+=bBJEJ}H<vr$$EMlf?xtX_27--MW+kjCrmZ4slLwXRIFTRrU)J
zK?jjjCmH*wgA<Z?rzbp<O%|6z$ylH;VoA+86?<xo<hKWkbFdwEML)1JvGO_UkwXPF
zz!v23PRPa><Laj!kaSEjhU%MuNg|jz=+lTGQQc@1M&n~{>{ov-`==j}=9C2kx~auJ
zhq0s)=AXff9A=xbj!khOD93bq2V!7@$l|%UkZrCkNyAq%j=`mrZ=nAGV_&{0;r=^!
z8fhMB4isjS$ZEyle{{JDKe}Ngoa2g8!>Tv*N)<^9_f2NO=9FX%=9=UYoO40l1tB;6
z3YIn?ebfuXbJa>D3!D%3rH350884wrD>ow*CRbp^MS@p2#UkV!W}Y&AF<L~!?y?`K
zUYn0qv3r;k6QIu?Ygv)jc%>LQQ^+-|PpFm|sm1b&vf^1H<1U%<-nf%RzTKA&xdb(1
zor=s%m?u6f+<#d8Fkp7nZI=qbmcZ(~CSE0VZU{Zs(R4c4>TxNf!)vsUPm#yLGcz-Q
zAN{GU8Baug&}}By1$+Cc{pNB!n%tRzq5lB-tOnDZ_^F^pTcrh#?9}=djm7QgNI=i#
zx@bkVs_{#b>fF)Xu7xjRRx`#oWPMh~!UXm}BQ1)`*~GKlM9H`}f%jV!llI$D)8tpN
zSu?+pn=D&)Pit<DNM-;Jx~taVjYea209EyrF<rt@Ps{G9{YPVO0Z}A`ek!>0A#uef
zX78kFEt)WR#b&OdNS6wK$WIl2bR32;u26MZJwoNbb+n8XHPP^EW9}pNn0to}PYg){
zA`cwqqPm;ih;A8g28DuRBm<+>Ko&?ut+#=XH7&ej-2t0-JW!AT(%_w=%~_$g9^bTn
zVDnM1qg8A|*eFs#0-|CWKq^Tz`%%8xR2-aUx@KtMAPljl7!0_LNFy2VHJkNishZn*
z9l&_4NKbYTm>H^@=s=12%KcV+h;(`Fu>wLoin+3U4Ue2vb^Ia=qU+@KQ`*SPKqs0=
zu}ip>b`hL|Ra&YtE^~q4QFe|@n=_h+S_%|4b4>an=>GtuU#6p6v0YbDVlBbQU%KAj
zX;wCJ8(5)H5^y-IFX<oZGwJrpqueHAJSnZ^#n<mAX;=83Yv*3ClUp5ASs%y2-O|Ss
z$8qcm#>(2p<lR96nFm7^S*q%uqW=J?{^Y3;j+9mUzc|J!P2t8QP)GGyp{|@Gx3p;X
zlXiB4>ZoN=71${yZ1k&j)uJzIR$O)~12&Iqa^R@g$H=ct{VKBm0Hw1d{DY1Ot2Fqu
z`l+>4{{ZwRSxXwThK)LpmA*gPm-Z4yKlFd`UWy75a)T8di31<~C8~2nO^rRW5;<ny
zDD&pBkgiN8AJsZLFCNA}IIa0eHsjh;Ob}b+{{T!@f7C(^ZINc_KGB8!yo-{kc&GbU
z2L({p)In|!F^W?R1Ng=&zx5ekfw=lHq6?$)Du1d^V(WMCIIXJ120!%0C`dp502r%R
zs6ViO;KTmf=$wQ<nsP&No>11{BqxCwrih>inAV;;*H>tl_%N++bkB{13R!Gsc>e&2
z&2Cyde*Ljbc%VHX6tdM$xRMHM==0fhdzb$JNi-z7T#&vPSEc^hKhgzdJ$n?8+oQqp
zf5l8V@@LAE>5VM=W5JqxJhsqB^l2m05npg2PvWU%)+DxNGJ;z)Q~HD$K@ohsS6zBL
zPmGME>m=7khTa*V+INyaCW?8a5(1$s52<>l^)0|ii6^hcLu=|?K|pye_i<Ol(`3K-
zO5Ubl@i&uRx*wXxDg&;7N`{DI#d-d(u4`I>Q4O}hJ0hz!ICUGBhuD$gbNs6B(6!b^
zMW~mUo{x2|N1QaVjQ(gD^#~vN&Y*vF=c30@Ss2SBxKZEcj<K`3y|$7j4jg|IL-bO7
zIg#ob=^A;ibjD!PO_BLD{{Y@;F@PE40QFc|^nFArx6Fl!<MBa0hkYZYW@%0i82wU9
z(_r5+3e?H?JvaB7N~Z1cVwl&udnja@6ZouL{+_#EZ`+aDFfe)UrPIAX>_<PpG=@?>
zBBS)YZ}|)Qnf)6DzqMb8oq6$^>2(b@6)$mefDZYrj2d0a+B9kz<UEExD!Wwl_2uQE
zwvtH_M)D~aq~PvSY>!cq_%xIHyF%Q;D}@Dq%?ESqy$;`KYm{Pp!RoQFewAtxK3e8Y
zr_N~je@87{37Yly{{VGj^z9}7(mc67%h&bhjtd{&LKF;+X}U-1K8ZcYw>0Pmah%p-
zZ_)@=%882)?ugUBM(xWP46X6zto;U+E+Q|vlgmBl)7pKGsi!6ET%(Ms{8f<j^{#Rh
zP~WFkg2U5s*@i9c0HZadQ_Xn2enzsmrAp1PTomI%wH>IgIu)TgE1I>C<*BT2F`CA+
zH2NQDSTq|V+^DW?7Rg2eW~@g7k&RjUsaFUaKP70<tkmw<DL?5pg^$EllMoZWY9ZZM
zJxtXgs30HuRjA`&rT+k?*{^Yf{?u!CflOogqxz}Dn-e|<hiA4K1_3l&!FH$|<W*gh
znNtAZ3NlFw#s`sKf4&bOiP1oapy7v_V=~8YfglY~8DIT8=gmQI<x26Ad!XM0A$f~#
z&n;E=i@gr>j8xYo<g)S21$4~Gj`LaP$_gvlfG3)Pj^j>o@+PAsgIPut_~(k|C;h3p
z$))H0s1S1JJyyf?=pXvj0G>BiIa;qhA$Hc<v=6xBaI5t0riGf|`nJeo*#eRL1r)rD
zHv<)0pv~{;khXh4%~-Plp+|M-OhjN1-DFeMST=s?Lj-<mMM84S*zyHBZ2(wH#@%vi
zR*lHr{{V`O95?$^i0oKrA!=JplrS6;F_TjTXH^P0rr{NdT$*x7>T(4Z{Gty~04n3<
zOCH5<mm;}9#^=sWIgxlcG>L|Xw%6r)`ld~U$K5_O+ROA!7pXyQ2tgZeax!XeN=Xv9
z1BJt9f7cZFhU0@s6;*P-%9j9qv+|<a@C1jt9i-x$jih+ZD>GvR)6z386n9FSU51A&
zSwh7B093Fbc>=c-P0S4ivKAwssuU7Ap&4#gg4Gh?Vw)@S0P{;OB$TvCw?9<K%X5ri
z{nUeMi~>QT`ym(yaRl>S*9Md$7biRlX=W|g6vzaKzzh+b)AFO06$s5d44zL_E0Pic
zfyZW)rrc!r-A9H$AZbWaM<#^?kdd4NlSoyF!L9{SgI5O2k`D)}9FSo<K`+1n*Uew4
z+JapAu67N=v0C@<Sn6@<+9k!rkF=_twesGpKkqd;u9)OI6I|_Nxk%B4abrObk{B&<
z+#3Ut=Ad=|08*X>KGs5aZsmUK5omNR3IOT}Va;l^i=%ZCGpPYZWdzZ@A|(NTHKEZZ
zD{JkpG51dvL|*%wh)}$J(}=j+l^^3CC_5tij!r<LC!L|UY`Eu~Q5ax-vjO~S8~GVB
z2Rvq$#_ksRu*v#0RjKM~9oXZJfTfe5nV(Y6A-D!kKNUwKg6AWOvGo<g+)NvzRNII8
ziLA4bQYN=onstpvNjMelev8xA!%B@p;oM}`ntBIL{pPN&Mp8a&+&w#JZ!cqo7l1`@
zeN$0OY3My8Pk%qNZ|EH?BxdSW<n@}_BLGMug45z#dt`}5FesSc4TcrtvSr^ZJ#=yH
zjT0vZow(%FK^d+&C#qJyfmS(-4k=C*LGwsQ9rI0boPAJy5OF85PRn3olOx*#ky{*&
zMIM+3H-TK-AH_9yF~}WLw;baYJLD8D-~~G$9QmfP80wydO)~movxJ;u7&OueZfO~^
z*s5K7)Y?9Zh41fTOn;|$H7U!E$y8k&sVC~VIVU+4<9?z2and0&N2a4l%h5en8&dsh
z*KUc9-Bkhlt#I`G^661TwOC`@KACAy6Y^31^v`}H+^L`c09aSa_aCT!pq!HTQZ9a)
zgKu2xJpTaSP5%J#Q?E+O`(S#^Nc#D1Q1v38{{U*FQXNUiKkr{3L+ibMGnntCC-T*4
zr+%w?pHDm5Q6L@|(=STP)r$4FlC$cjYEn)<K5M}}HTvqZFpE=%88{_t*1a+6ttVW8
zUe4(WKk3gki&3M`f&AmaqYk4MZ`ms}4oLugS7c)bxfn6P?veZCRwwu_{0sh^QYgc6
zYmoCvBAo8<C_bzt?y0x=b5NcQa+N&!rDg!;owY7SNuhC`PZi2nIr^rn78SusG#@;m
z{{Um&P-6VkQy9%T<HUIwu0l583O^?<K&rSGBOO;MC-I8na6DHl91x<N((DCysPy&J
zw5jc#K@6uA@<*%eB-6FY?WK&a*Es&G>~oO3*Mxud3F+mAb=BfdABMoL=b`I8oZj2%
zoUc~Wtk`!Y$~<TUW(<qK>`^fwY1GC$@j@%8e~v)&SDu!&7dnFS4>j($q^$7yJ}9))
zH2bI-CPvt&lUD;oj^=ONl*^uTS_!nohfCh%XB8HUa~?3?#cLzT+^oqbDMrQ9^!UU@
zS}?LFMu`zmyK^q^Dbcdr&z<{6;;k*AINq(59x6UclPRIqVY9d^FT#h_S+!%j7Rru~
z-C5q~v0Xb&C?9|+*0!+Oz@bJl_d}u7xiNZ-a@j_AjEcqH0d;ijFaSK)r}~sY_V2ii
z5nfYsk$WN|g<RImSzM#fW|+$R9Cr{S&jYGRVnD>nAOut~&JkEYEPsimwLtHQPkm;s
zOk%=6ZJ}l@$m{b=(`5HAK-xghbo)?{vB=2Bnu^83Umdu{Xpr04$ZeG$X&~`LUohGv
zL%4aNEalvY8FIMlrJh7{h4_KuoiT|cDI`2D8_iT+081-%njo2XlBbM#rbi%UXxTwD
zvI;VK{1FgY7dZ1-_~h9x-1Az!VGK@L)8qA7T}~fvVIt>gtlmP)TS?M1C3$bUgHw^+
zR5NkrhqNJ-0BLXJQh*HPcTHd|eH23Lu#{cEkzVa&F&Oz{BdYLkM6pe7A8+JKax2|z
z^1x^6ynp!{sl@Dhf9+wh5x<_QLVN+mGCU6zGUIM*%Jy`THJ;+Y*Qa_jm?JI89aoxJ
zqizIp39nG}a_zQE)=Zgaj!CCON-4zDoa5@aWYYftAxHtLa!SnT;weBLYmIQEwP7Q2
zGm1>{ic)@fG{*j4ilM%2bUQp(DdRb=11n7j83v8*ruqpca54=rxXm@XuE*U8WRu|$
zMGjW{Q(b{PR}xPGk&h#aDg0@&S0vFt81q%y#j@Ohv##Y7Y7TQwl|ft)oY!;HGs-KD
z(MDJ%W#sgplRIRx;MHBvrR-yyXu~M#wG&yL`BoripB0L2JF-vRdSCS%QdIjhtWriT
z{WER61_KAhPjBhVMFn<(KJi+H)3U^pX=AbpkaJZ(QpA<e-rS#)EV>P}PFn*N)9AJb
z2WNf-YE7Ds#V73J&mdJF>hbXrmLJKKO&-Nmud~HUwl;7e_Td1JIITFfg?s}@_R$Ps
z0ji&?O)F7DdkOh75<zYM0H|RbzVT4Dn`~Fy5e|FewM_&*QER_%7~mkq73$J`qz_>(
zjHH%Os9mw~Nj{$u3d<iS@mis4ACwi%Y@7{)kHuE~Wv5hz_6(uZ)lNd={i#;QCtykf
z!>%h*ZBd8u6tP;yBvY_b%hx%mp#GcfmA0B}j8rr~6HlAd=)~*=LFW~sQM3R!6xQ8b
zjGniulXU40utVs{Kc=KIle;6uO&*(Z5_=xs2eVpa2OMUbAt3WcKDDZf-C_11_@I3<
z*jn2AV~Qs{a%+b;$)-pEY$?rir;+4uB#RQR2THg+9McH=(u|S?N$fr|nlZ%kw(-Q9
zlzU_&jDy`ZfzC}3w&aZDQ(6*Fbnio%{{S8V?#JvL<n}4N4ydNLIN;Mox$<e3r|KUY
zLGHj6xHO0JMH_LR=_IxrbDEFSbMwG`=%1Y$Ou5RA>cynn84by*D+!(%0*nmzS1TZB
z_c+ac+79A*5)*Pk#&PvaEAEdWxuNIT9G>Xp8gIG3nwP-_T)s{eV;%lzH)2D0sU+N7
zobgc;-g-4#Tm-L`a7%Yk9Fx^gbUs{lP;T*AA7dTSuFw2ZW{~qhgT^yOSxfCqj5`N3
zD)owVY!$Oj7f!%^JNjLz4MWqh5ZPtm*0>Yy4cviV1NuMeWzzLjwVm+<ha8&v5gr+l
zWs3zEabA-SML4F6Q;j+nMv6nh=D26c%`Vkd&fMq!02JND2q5OJLK2Oj40dW&-s5VH
zssQEA?YxSaWZX7^ikCqgxe=3r-8>wq`>5lLlYnYQftSy8wu4NSc>tdkDRdxlP$IJS
zJE!g2{Jhdtkeb;HrLb^3RBqWM5=#y%l(FC(Q;5JMj%Y3&gpVW){Zz_E&_KyF(xYd6
z(!;p0Rpg2{YRObe${6hn&2(g=U>>QGN`2fh$AevfM*ZLIK`*irmzH1Vas@ser4H^7
ziiJy#sq(uQ2N>fNYr-KUB>^hugViH4mkaew*mncEoV!UN^51mTPQXZoUC1qf1#)uX
zR18r2aK{Fvj|04ln@M33Ng4PA86IjfS0n4qHZ;o&HVrwKYi#WO(NRfoOBC%63Z5&9
zm2CJl))?S+De_3$6cN=%$dMZY8zk2jz{waCgqArqN$WjfG`%883xOdRQA;_fM%d9?
zogQcU)#@*LZ~9~sd{_Bk*O9qBuUV^KP|G#mqb$<7W@DUHX5=GiIIbQ^xyiO{OKPGA
z&ipHPQe>oi6rOvgKHb<K%kHHTE^t?$CaVn#Y(Y47AY!y$o{%aP&TBTRQF0h(HGQB+
zr5;9M#T4j;@=nmnyC(<5Hu7I}3ugols7deR5(Xd@6<MlY$?t;^%AW#*nAA%x*@G>d
zW~lWW)M7|tNv7)8$J+`>1o*0Fp97w1nPjn&8+$pC!Ol%X(93h56vuTflU)h-E))+W
zRWf5G$?E?Argn=So%@ZrEya6>>Ai-;YqwwOytmQTqCGa+5HMH@_is&Gt*(<2W0Jhr
zkM&I4zsc>rFAYV03sH>c6(>h0x@=0!GCCF6=Pk`}*GFlRs{%zi_XE!r26^J7Q@hVq
z6s^#XBZGh`bL4eM;Q`Jow~e?QP`7-GUq=SqX;&cQJX26);e6A|ka6aszR1M{j*Tk@
zIBJX>9%?}$JQ|8i%N3@CC&})ko_Ouxd1Y{jqa>3~kgRCZIRRIaPm1~5_2>0L%(7^j
zJlj=%GCH2?PMb|VMATx6bLWN_IY_qsRsCm<!KKsmMh_^+G6U6o`RWf-YP!wCOMQ7H
zrH@68Q{7xV(lfgjVsc5L$**Cf^!!?Q@U8uxPh09)Gs>3;xzU?>V9oyknzYklGTjh4
z0<+pkkk5tBHRzs^(q>>~DUj6qiiNOgB;`m7TSK0>s2<Hh&f48gpuWKBx%(9a`Y==_
zmle)g4y_oGZ?bNx9Y!*-mhp<z9VSPRxEu<g^;{C$+Of_BUyfHKiZZ>hW?5!n#|O=5
zy(8*t-4^;e?Wc9xK_flZMP%gE@ID*5>q{JG8j|4Q<IOB`-4=bP`eW-We^KdZ(l8eQ
z42;&<aM|2z<L}ZhQB9_62^Y9RG3zz;Z>KdF^(!=xACVl_hxI)d>GFJ64EEX`PwH_g
zc`SxQV;oenl41&=W67pS4sa>CImL2gYST7%Ol~pFD-4YHT=@r)#V;+5y-;Z`1{!w(
zm8oK5jFDVABh4UY5s{HbG1&K5A0m_4&lJTZQm#q)S26MhGk3`-Qb}4=BnoCp@rrr^
zd7~ac#bP_Guj+o69;xYDq$(K3g_gAM!($XSr3jl&o+HR)BmV#uWG3M>{($0~X!x4*
ze0Ng8jinfhx%8FEx7A^0YyxO+TSNPAQ(r;`+(>G>NSprg1D=l+^qe_8%t~Kq`QEc0
z%#w-ix|P6|1yz@ZTBwXYoS}CXR~f23H&JOMjxGLi$?;lihPGXbPFSx7lgZ0z(H@&~
zi{EUMgIfJMKt{n>3yEGi1hI_lJmR&wOv`Z;Q^MTwxKkCSMu}1~GJFcB)#QxY$sjlj
zQ)?HdC{<%PEIF)?s4t#7SA`Bo-`zqrd;nf=yay~o=CHcenYX@t9m6%c(hTz3^K8fH
z)*scA%9@+yO0YF%7s-A*x&dTT<7qpOkE$}>JSpAtVbxYO%Q0CB;lAjoZlwco9G^9*
zGT_uFYcXzqsyjsusUsodJc^U!EwIVj-hVY6i4oogbGIHo>60O@vXzog@-_#Gk);yM
z$^gfMRh#|8xoK8Ht&rSP*h9>LRD#$iiiAj!wsIKMGS3i%sRfvGnyvR4q%5S2P;7+S
zwb-5}48I{%5~9xo2L4_s``0Eml!~a1>319tm^GeinBcItw8aR)6&&+&c+E+ANqz0v
zs`;b=#%NyvYK<46+EeNXOiRw;n)llQkfisD@DD>YjjluN{{SMxHS`9QrvN_(73BW_
z$SLH3*3f>&9IKWXG)aVQ%?$`9Y<*G0FkDvy4mC9<(Y{WoOE)UR2aMOMdP#rnQ2mPX
z`zY};splrWU(+kH-Y`C^jj2^77F_=TZKC%aR|;?gl4$@Zj}@9ophHWZE4J4KKUAtR
zzKLjbQ3pBADS&S7x-+%0L@zzpYe}cYml{oSQ#nd3fW)9uEGm1c<Qy7}daV-DV~moa
zRNF%uH7#B3B{-<famFfspC2v76Lb3`5vVke66ZLnQ}al~6U{HNiNm1om5oFh;8MB_
z;}tAPnq;IeRksc(!ZwhOO|ln_SoMl>#Jg!ZH76pSMlIbDx6u<u4~)}HV;xZfkcvAa
zTq15D_Q4c9(ExLbHyGxbD(?ofYH_JSrP%P5Fg=O4K5KyE_|muvJLZ@i*9GdMkK)a0
zH^~gJmwb*fQUt)krvi7=oq?tIp#DLp(H_tT6y2FTRH=++qbKA@IHaCv#-%|?^e4CG
zl~e*brQAyQNW9}6)o(KXC^hy<&Bz9t*{&cS%^^N19oHhERf>-4Ciyi;4n;<IG>%TI
zz{zqy18)MGk+*@(J0or>t&WzRt~df83htBoqFhrgz){HNq>$I3&|%q!pC(MQxeWt~
zW<FIY2X1O6Cp-%1I#g7SJW>uv)pLIuMLbik$TXjoBb<ujK2(e_DK3yt%<QtZeATQ4
z-q0M?N!{7*i?p4JY|b%XN`$L}&$LY@cHs9>tAK=gu9`qbLFS>}JI6HbkVfY`;-X`;
zV4Tz4i5*lK=RMNd2<92_)du-y;8E<wxW*_cMjYVzswTM#8l#SDk0Sz;s^Bszu$-oy
z=ZfMn-8cXRK!#dK@v_K`f~O$Ywtkm-_9>u`Ow=UW<Aq{t!y#k01W{2<BrLBR6=BFC
zx*A;X6Z{n9f|2$FatDkuw&h{L6(m3eAY!}^(4VQ7I(bW7Oqn4ZZayp6bU#vQdbC1I
zTa?Ki6J2=ajzwvioLqY^qe3uO{psUA?^wtFX_o96f0!fjPLfT|0x3BBQ{C_==Hrzl
zQ`2$5V$_j^+lJw(%*><=aX~);B|_bvsJnZ1mLnnY)@W#gk}}+Rt5>*ztinVv!?TK~
zB%0|AkP5HM80MyE!lf{<;g^Hvk)+<(Cz_Vo2xbZg$GVa{MTY+XRY$azz~+%MWh%!Q
zt8Fny9i@)(l4@B}Gz;yL*%>s&9pvQG)O<!bG%SLn0ze&~x>*}x>v5(!l^LmmH~<h1
zXz)Q5@^@pOs!Xo@sKBKJZYp>pn~Y~Ciey4U5aEVF^GhZaWO5BKo#+B^X-i>3p2u`3
zgovAfU~ok^fCfqCnz>~-@kn#+0gR0IMLJ}z1uD!74hP*cEJo64!Op^ms)3vq?*gN3
zwnV8CT)M*va&fd|n(-ghZF%(#SJ{n;iZhDaex~}3bo&q5LnrxPgO955+uMv1?eb>k
zxq7VD_M=uPc`5a<#_SZ}nyV@@RG*$`txn=!a4{JbTRg-_8oBO8!7?|=Mg~PkB#X#i
z=y7xl@k*g&3`1=+yMsXqJ`@AaYU0X8j@;mYF<IDukvYyNyJ$5Fx7lwCoc<{#lDir(
zZ=k8}nSlz~=7yg2_TxAruB<&9tK49)_QCG1t$$9@xw*SJ^I0dX#~v)x4IW7=qs^|S
z%M#DJglV|!SF*+Wd!wMk$-1XMr}_{B<C@O(_|baw{EYDCU{7>jjyLL1uX$l!!~X#0
z??!Te)Be=kf75FSZqjIHU{vQAq<*1|svbU{ku=SBp{>NXR&kNQ0==r-1W=gZ3i9tq
z*`<!58_M4W09U3#eQ7B8IIkzKZgWi@%hS?~DTQT$_?!xOIb0qMBtcnoo@<*Vek-2S
zO3mB~0SA-CD;$3libI85;*ftW#+C8F2^io3OCB;e^-+>Z{8Es>(qA?T36RE{N_R~R
z*(^;&PUFa@D(8Y~$2;gvv@Ubcn&Ia?({Z)Hr?AUs@k&N{x9h*F{{Z}+^wZnh$VJ4%
z9r&-1H5<8ZZXRh^v#}(TUbFuI)RwZ>-l{gL*+_=mSB^+|uV?7me{TYt?VeNCwHvbg
zYosvpXstTU?=BWK2L$6a3^AEyR31%wPo;DKZDWxm%K`~C)yt7%iN?i^<dE$f(K4dM
z>V$^moL8!Pf79O0sv-fLb5|aZjt}~B$2>!UR_PK%Nsr>S#m$yee4UI?jy1tjbaNQ~
zLaOid$X&2pW~cR!sVy|SVy02<9tKTS`eyI<<9*8!zxcIPwbhoXt0B3tNK~mDeb<@(
zr=UnN891+FzqVvkpPG0zfc4Ik9o4(xdBHVq2~^W!=UbrCJWWka8K~}JXl@=h;HjmJ
zxvu<Oo_VTe&WCi?*U-ot7-3&kewXUIlH8a0$N;aBEzTS;@N4K#(!0ie7T!$qrntVf
zjXY{cqx2k)bfA3*SQDJ&RBUj5G&O|AYdD+9S||>FM+UrP{#K7nl?WJ~#%apIX{>)d
zQ#S#eRZX-SvPU`OaZLl9=ZcOg42(k#DrLq<sRpug7Pjy=cQp9ho~ez(=Nx^~$Nagg
zW5WYn5)fGDibW(oDN6uyPc;~Pwn?iUAyRCSh~JZ4uriU34KpJr%|1B-FwF?n@<yF-
ze1-bmY_`6%xd<`6i8YJTzx$OW{{YQzfBJ~{`q|Gw=Df}uhDQV*tLPq^E!t?F7u920
zi5rV;>2i*lt*k7QTSm=|*sn9uwQ?>aNq>nvn)F>Iqnb$eKg5nJON(8cJgaI7j@3#!
zWaWnxPo|}w%2RIYNY8Y#G*;JWuKxhat}SG>C`({xjMOM)ez7}Aak#nKJPM@LpqAn^
zkR7TIUv*=4V_U`nx2|YSe(ZZNLlW(Bd{JDIDVnV&7^2Zb-~txBOV(2~y6wOi!D{vM
zB#`=Xw>QWoc!s|reMRIAmMzU$iFVBxxd)SUafM+=LN`F6NX=KXw1G>uaorPbBBO>@
z$g7hs3J&POPmj8qG?HnIjgN@q6b!Pn%ai<1nkLRiQV<`BJ=1{GX^gT!NEvW3iWSS3
zVY>uU**n6!qVwHFbC-Sjq7kdPztn#;j!J^SjCE1T?hcP0Y4I-g*g2@!K(!$!wSeCm
zr&TJ<RY}0ft9!&~)wdohmFIHpwXhEcs++K)X|_pik;_#a#H@rKYUV$RgU~fusYSAF
z>(f;>GcNrG(*FS0?fs*JxMseI)AB;@JmZ@AztG}V^#!m-!QLzB9W_qH4c<AgFYEsR
zz6V#+{{UkRwg=tFqc;-ZHAM_z8#(5exk1fwBHk$f0BqgFm|d?I3;zJ?3iWSJpYTWh
zSCrbH`?TlCuT{`N{k&v-R~uc<RYj}z7uoL=gY#T~*B&^ma=yw%4T4W~G$%Y7ADUgn
zat<o&8QwOV7E!U&C`oggm`E55P;xmvnsvMUuxm~YYFVm0CJAG>+Y2qmDTDD!PDd2g
zKZ@+RT#6DDn&upMq;Bh*s#nc~<j0yvrBg_9NCJc66i*}6khK^aiOoIu;<*NR6`?kg
zLjFdO2Q<DZBpXgEfdjEsslClGotzrxIi$$kaZPG(a-+d3k=Wz9j~tK>bmDlZ*E_RS
z<dSW0GC;GY)*_?F)e&|wH+iTO@rv?#%qkh$@NUN|jf)!qNUmVG@0#O<?wHgR+#;gT
z)o#V$^-XkO-D0>?i<bUJG!u-~OM-H`5OG|s&(W@6Ht=aMaYZK=NSqHN$)saSdIL0s
zaxqHdPJkdhaZMz*7^xcr08+0c;8m*|{kAURJaJEjH!!A?fNFb1{{U8M-5wuvDvRhD
zCAKz6=M=0suKxg4anG9dF^Wo&hWR-`BNWUX*zr#k!hTfkB#{u3fr^X+ihHo?kQxv7
z1$0C~-CZH#oZPiGPAPmcADWXe3&5dZCx0gbinv3&AP_$^8OS95Yv~;`fEZA(Bb?M{
zDso0@cu06fM>>(kSkMS$eE7vRP@J68<Xy|hVxhy2mS_-Aa8KP0a6Vh8h8bMvnq=|j
zI!U4+Mj(y_a-E|zm^r1*P%*@o=9ZK&0~n+PeN-VVPlJQzr}51Qz2=MHsK?b^je(8c
zXv-Z=^2Sj1*A9ez9w-;4qgAn!T?*Bk{Uh~5U9bw0OYR`8W|#WbhzIU13lZkLO|o-Q
z^H%<=BJwN66X<w9S+s|J(%Z7s*FUW~F3cslIQ-Ym?oK(TgG()TODD4v-)?_gEmWUs
z&Ck_WP5R$>Nf29=&3K?=nzzt2_q?1j2VkWAW@&L@_M(qs^zZ5~sdb$~C}S8#Jd9Vk
zX;8>*Qd!UW3W96PzeoKtv4MRocnkb;JFi%0DTNi%!@p<AluECG%HJ%JfGM{C97v1_
zJ&KI6VncQKriKH7)o4qb5GN>NRtF7GF(Blc<<If}?WA|e#wy6tU?%roNWz7{^HN)G
zb|5}KCWnON;G9#T1Qz6ql!!{oM1*e1q$=&!NFCQKh@9u2%^zcZ6c9-niCDjrO7hnc
z<&Ck`zJU}48@eN^=)q<z{N|k4EK3dzBIK~zc&8LD3wAcoDzFRWP`u;h*%|7eD{c6z
zupUJ~a#)e|NfhqbM@^B@7(ObyS-po(u$Jml0;1-La(Vf6-FZLj7pYmo>9&kYjvR^_
zyph2RzC(#QvmxqzY-*acv&czYowY?Z)M1Hlki|zD{{W1$^;K64``97)*PWD{otu&(
zxw+jT&spN4-5EwQ43pJK9l3VPjwqXbGHZ!MamK_RxTJh8MoQfxp%dqT39HQ`K)uvh
zZH=Q(vs&Lz`c~fDEVm##{TkO=*<k|0ffRiT=jYbO93!Kr(8_O<Go<=L&eav>{{SOa
zqfXP6R10fW0C~k(+3DkYg+_Kijw?^6`hM}sGXWfD>b(A=)OR|Iv(e!6hX~$Eiv0Fx
zEi}8SoDNh~oLVv714hsNR_4b|wSqFF7C55fiX;Gp6`pxAPt5`l#|d(+7^L+6P!kCh
z!RhOdcVNI@>bFf8CnJGe(F1}2;;57x?V7P?Q06g4NB)4(sK^Hco+z7rAvOl~^NQ0*
zpyQA3p^XZ;9t{NteU(ag+A~+0JMP~UZI8uuy{xGjZ;~?jt#l|tk&Y=nq-^FeXl<94
zI-F9ZiVH1H?hI}_g<M){(aZv|+Qy^0(~;Z|HmUlmYuz^P)yu{5TFsUXK34|qHdQrO
z&(k%V_W{mn!L!CIA7iXdZu@7*1MalCee_pK2w_>J4<^o6Xg3%c%{&dmIqI09PDW|N
z;y9(R{)J?y6kwjKw&#_oz+y8`C6z$~iab98P>14q9Mfcdu}FTD{{Z+?l0{;2Ii$Ho
zdr1a<Or%tbzL_87_@nh^W}ntq=Q+frRxOpXc(0;8NBT^fp0{&!(>7#sXlVZcLv5LI
zZWvdw(|VQ+c%v5l9#>iEneyh8k>cKzwM&geBmV%kd$;NSp2ppvXtFcGrs?0JuvyL}
zOP1U#(LFcm<<g-DTX6%rO;4y|$@^I_dUjlWqFpOT+JK_x9w;AIUM`EK%XutiQ52qh
z*1E~o4yvct+Dty9(j~Bv?G7+29CKs7S$wqFJR{_vUV5H=dsJzZvP7riE6~45b+o#$
zTU(X{ryrU!Z_tM_5|=+5@y%I!fAn_qNxE6$x;O`sn%_NUnppOxW5=M>$0p{DoYo~^
z4+Mfp$m*!|T_LAJ<}#~}YiXtEvs%Cr<lJ#nPotSe8SoE^=l-Gz@Xg<*$@F}a`h}+p
zJ$g|Z6;f+Biboi)r~P&MV!GmH5{n?{))r6D{aFS?sDIwNxIIq_ic)7cH>KxtZYc6w
zRVAz4zeqQK(9|CWvv2xCtU%!9NUucnW`ib)r!BOf&&_A`J!TxZcEQ^^9+9HWhc@Y|
zN38m7%W094az_<t=QyfwOY>j>Cjiy6;B<JeBbM!+l7|7ipA^<VE;^>G;1NiNIjVT7
zK}c{s^-l$kc&385t`CEQRwTTTe)3b>G_m2E2Y@Oz&vfN0jFXCJ+hVoB%yXLHaG0b7
zc&96EJp)B@LA!uCBD)~BTvE7R=@$ph2D}9MTmJy5HQVaoN04jDV_<m4HS3@KOJn~4
z%EZC&Ysk@X)$e^L{@N#n^`HBx7P>NpxKc<XJyzRF)MbKr_8*T}tTvW0bchaeYgrwX
z>cNnoy6Dfdg_^vZ)Qnx?M&+@^PXrRXta%}GRJtwQ$c1Ao$n#g0SEXeFRXqJwW_3aL
z77)T0a4t_ZQF&_;+<WpI<M@iuyzt2oJFr0WRn9GCd^@Sf=86_p`T^IjSbBJf@(JVp
ztH<?l-udy(de5r08%QiYfr5G!<$Au^Cnv>d#rsV-l|%tS+sUIKNQePWIH8f5Fm~jA
zYE)!w_;1BAXh8q~7-R~TDF)>LU{r8{?R<_%@lS~FWoE*FdZ+M0{sAMZco?GEDA9e#
z<WRQznjlrU0=i<#xZGNbAso0e;xobVO(m7K=Kxiwr}SAhY04u6E6z=5*Yx&TPUe{B
z@mVwLaNw5Nvgl!))ueg-<8QWb=K%Ft*%uB#<X5su`e<Zbrc;U*Pty31{G@8fFILlk
z$hB!Q9!R-&+D`(K_7;*XuZs0|{X4Udjm=FSj<bPqa(}v}Uaf<{WOQ?9nfe|nAJkUr
zA7KtF>0K=`8lKJtdES}ny9h5*NaG5?=QyuP(-RRW2ccXKRn<pPC^(z+xLzo<!`En2
znvOnPQ{)6E0-J8EPxV~9^4%!K8lb5*6>7itY5mu%=nMY<Yacw~yrR?p0J=>70PYp*
zItU)bdHSwat=42%{{Ufdql!#(&31Xfr3W>FTU@B8kq-vDjMq1OS7VAF%7QV$JyTPI
z%`1i-Q%U~-npIJ1RmjVgL2AL0kxptDpTVh<s_*IZ`_$2zQQ9ML)THOC=N!`UkIiUT
zZ3-aIRQ{M6V)xBB@_6cuUj$-Fg4wPBfsx&F4rxyV8LOomow0lkVowIR;~1p;n&a11
zU*sZZlSpY-IVTm&MJ;@6K1AY?d#=GW&`xPwnrJjOVU!|-109MnxfDFhgPP@fmMzsk
zqedBcB5n^fo%4$0-6;nd@m!evnWLdR(?}Voc*&+?Cp2aBL_9DH*rpc9r-cAy^+-ba
z6>ir`-2%}3<O)NaR}bPT$N1EeZHR`BzRhvtkxBppnol&Vbp+rGQL!mRz@cIUw;fSg
zIIiENVo5o^%-Q9995b3nq{+=Ao-45z@hI{rrjv%H#}v>8>Sxj-8psLUPAjv;J^H3#
zVv8XeJkwjO=DWA0f2yq{&`-^Ki5QQUIiOp1P6rfJo12Cp;-HryaBJx89(<onD&(U!
zDr>N+a1Ll&Evq0VjbIBLin2V!0AS!8(=ZB=L`xq9zwt>PP5?}rfdvWHO)IDw$0HRf
zA0ioWX_@$yK52U)UndRLDW~~$Q{5p115qIPWY%`M2$;u;MjUZZDqPa4cR(a#IHI6Z
z%TTB+NU35hr*DdQK1jupNIdmX@*AoouHbp7m=!f>2-u2Yz$T?-CAp}bPHHwnCK;vx
z4Ng}z5JS5kbxgudV{X?M3lfZyYt;Z1LF&G}3F-E^xQ5mw&mgg^r=&Ed1Wd`7Z{uG?
z{+W7sSXeF0sfk=<X0~ZCZg$GDPLZn6>?5(Uirohav_z|h?uv<UxfIKflYE9TT}Li>
zGte`Zj9_$Vb|m#v$mARf7Di-~k~yY><P0SwkRV!Ix`UO;G%1rV5I6hLF*zh09tJAj
zg&eNA0~j<>kb8Z9y$<oJuJ0zF?G=l0ZVd@j=qrW?ER8JNK=VgJryTgGVTdba4r+3w
zFd*mgT>UPG+s!P}e<>TY>o?+#7_Jd3x5ERfY%PP3DO52B3r$uEcsz>fD(J#GSvN5(
zIW(KHyi_hrG5e@4ZWh+n=b5&u44lwuCCF8>qt_bLdOnuqH~Cy(3h>*@uiegGZ<gXo
zCa(Q!txK%x&$Z9AhYBk;LkA=3xVoILEWZZ4aoUBN0JiLL@$p&9i1#rFdo`yoxpoCO
zBbozG(j>gp4gnE^&19QVR?DLoCrBSj((bNq6>)^}Ykj3@>2DgWXc_+is_j0_VG9f^
z*JidJpVPs>NcYNo{Z|iNu8dbFdqe5+llZG;ZKj_lRh>>xiq>d8oSNZ%$wt)r`>VYd
zM~>W<xdE8<_^l1Pm0yMc73VT!o_BJ5eIAn*OnOM|#I9d|{{Ra6T{;UyP>_w^b!i>D
zJ+Xbj1@S^$+$tQY+|<Y~#_g}19{#F`R3z>2$vk;-#Y(M(4kO*-q>p2Pf-1gCu&lrx
z%4lOWy^Mva?x_ApS}Ek^@Lf#kqaB)bE{z^M)j_#@1D+~Ndy=wbo(%}*`2pyj+AU*M
zZmF^$4S+c{3P_92>}IEw?Lg{y`>WLL7+yCS4?&y`>G%q9)j@7cDPVGG?9v#HN61w7
zIOPLk-BvzT&mJl<7q~X|DY3C54BY)v`{5-6#VoSIE9Cee+MUwut<-djX;`Bu$^QUX
z)m0lAw7-(MDl17a-o$`^6xKy_A%=eQRZkpc@<`E-Q<4cqyFeONs};crU*sz0H(YXa
zS!>NK!Eo0$xzA(@J4MwLp@wm`Pt9PIX`NK#INW+K#G8IN?wss!KB!e<-1w;?XKaqe
z1t$0dhmpjoehw+>rOpAUfCZ_*73kE9=b$t(bfE0atCQI@$t(~K@m7{sPZUH9Wc|>S
z$bG^yn$;<&!8FPv8A?qw0&E9^#W(gG@Mz<YZ@y^=$0N;D^31c_h#Yndax+bP92{n>
z5=;a3DZbz=bKM7-*I-zc^aJ}YF`AKd^23Ls*~Z*sCX;qEgNi?>lI$MJU{7LIvAZ<G
zV$5@%s>Cp1(f;YmpyLLIyE$|pZ6dXF=4>3-Hk`onYR*BPn-tWL0fy%l9%)?zV(hOv
za3hc^Hqve1wHd2PhDP9VpA^-=0OEtqI~hR(*;=inTNk2gb~Vo1j~rKZ;;EC|`z%8b
z9D_;;BDm#zV>Q_zW|1mm4p1@OC_+zsQwB{v0Tj!zUjl$R1k*$;z_D&A>bV)*4Jq6g
zX*A9|yB6B$YK@CXayc}Fv8VPio+!pCDnSsk=9|Se@EZb@XNp^P8a`hC0P0P4`mQoO
zit<KW4A-y!0QD-M`l=v5_Scg<M5yDM_x_Rp0BaM%de{Bbf^^feRFO_dJy)UVY;Ir$
z_~N|g$r&6I)p|#yWLaTF$HjX!)1QWLGsfaxTHCZ`YRU&sa{vZvv4B~%zXRop&otsK
zKz0V~Rcy(QEpT2uX)Hu-!Nqz0uQlDCqauk}aCqjuB^D-#I5md#J*k&cB@~=~De*OY
znlVMUOw8&MO*?LqQpTyZ0k;J5YfSoG<nn)t+C%*c664c)%<fh@8BRRnu9iq8*{x%j
zgl7HC;4X1a*cTgE=gn&^JqfJcMy5L%Rs51F&e!zM)J}H`cH_P(kJVyNnGUr1AA=PF
z<-i7}HpN)v<ks@n^sir<Kryg=;;n6eNi}?8N!Opc4_k$QA(odb@L=sE{{RXgI6i85
z-Ghy*oQm}wKlGO7HTle}aot+!{{W=Hqc1G##C=jfP{Z~z(d6=Z=b<LW!HxKKJR0@S
zOxj$@FplOX5fQ^;q0@g&tZe34Rr2a{UW2B{*4F6&Sja()kz7qo@#f0p`Zr_4C_;)k
zbY={Fw<UaWOidC=2@FMOC7wrf{6>C-4-yA1S;%D>?yyfy2|f#wl3baK{Rl^#)gA7N
z3Z83ccKe?UryfmG>YGFV0Em-SEgJJ8#LR=~W+&aaU{%$Q1WH9#YR|S+R$?-K>dwJO
zvH<H9mtBk7#;CU+EV2GcT^itEQbGw_8kAvFoKj<INlXuiLfe1$QT^@}>lz<6@&5oq
zyqe8}T_%6`3iX`~kK4zOp<FF<Pbz4;_?G}ufGMlaYk@V5(&VJ(?w34blh1Uc8RnvV
zkZT`}MU$G6PDd3P?uJX_k>YkRbDAn4l;F_Jc*ZJAc?kH{-3;T8B|jy3Q)r!+9&3Q$
z(lc|-Ata6~wYn<I^v+SwbmDPLQ<F;BCz=qF+h~n)NVq)IWS!LW!qZgNrnux(&d=R0
zK<raryoyf8HBy~A4GIt9n#=*}kayj4@y;n6+l_{(QrvY>XOm9un~r#@=O-!|Ay4;A
zopPC{bxVw)`lQK?WQ&UmbnFBSwhcUxgpS2E#u$ogaz5+LD}#S#-o?N?=9r@`!Kdx`
z<Qig(zB7srFLG@L$F?!uH?jDoky%eYQ%4l@RW5h}=x`hiRGV_SCXq<l`J^Wzi`y5^
zBJ#yJ`=nt?F^*|tLec;a6>cbQb@nTC5fWhs9n;9-xq-$7cVfL(9BUkQm}KqQ9tnDL
zubNInQn5JZo-tHIhgBKwo=+L34qBbHXb49q6rL$uWK(nT!KqHdGyPT1{BukL<x0bf
zNWKF5KQ<H~VoBgo(;dt}6hX&eQ&<AI0~Pd!k30<@#eq4iL?Fhe%?~I;3WZF!lT$*D
z(F_i#!{~=fpo35-WMDzTqrI>b<@HfXZUlS7Wc^dLKqgn*S&nmDAwSC?(Q6U*fg_5D
z;5Ix`gc9m9$94q_6c9(6vb>M{e;$nm8ihs#WSXLCm{Abh*P7uG!8xZaXX>4_lYl7#
zL^2Npns)54`=<~K9&w6j19+&r06+k*=9uca;M3KzdFqxxtauo!z(tIl{ZuY;PZc`$
zR+^wIoQ#Tuq7@k#qV!u9y1W@50CH)IAG+aGV>Pz)o{-W@6}(B8ax+0i$)ro9ZT|pF
zG!Ne1q^_Uhi>^2|?l#aYajGT&1Ar?%==~-=Gf$2bQMisxX`)lmJlA3n*%XYe+p=Ax
z4(My97y^kbpnQ%<pmhgP9__-4T=Gi;nXu<3g1Whl6ugcVN7<n7HCSSlp<!5mR_jf1
zc^#w@9i;P$e3@g8HpF&o<Mkbi$ia!o>+@FHPO%Nw5s(S+YtLe^X{3Y$k~uYWk&8){
z_iP|nIzFK--!6nro5d_~lbW5FGLBCkDz8SlS?u>S^G7QJzsnfvy3wg}hOnHWT&bjw
zB;?g@t$emNp5uQk@F=}1;jS%GVlZ39P02mJNqmOL%m`jr`>5p^Y%ViX$l~A){1L@N
zAOMgC2<ZLLOkx(w=fM@{zo~lj`%R}>xZRAuHK+BCw7O22B&<ieN5p;CgzB24*LP1l
zIo$2dW7X!RwHIQKk|mL{+<B#nHj$Vy0C=J2a!Tzvr@=7YuzxdL*3BW-aW?IWGIQp%
zHde|MBuR#D@m0E9QX=L-$*sSow8J9_7;<_Q#q|wRinr1_`W-d@0A-J-^Z?KT*e>7r
zt*(uxLt+<T1-z48qgbt~jsF13i@B5Alz0{7^*P+9zO3}RXvsn1PJ@spNI0lWqB(YM
z?uC}*Z49S4p_)&5i}Y(ewF#SW=p`rkCA?CS9j74F0Q@j}rxrUhPHG8QjzFp={*JZ?
zQri3uR^;NI8JRJj1xBt4;~1x44yZ0VE^D4eEX)F*6!_bLpZBO3^PJO^?jQp~`2)vj
zEv6U%5=9a3yTTJEiU$D&OAN(W5zP#8aBkUNOfiyFf^F{F2MTx<Vkh3Av%#vHxe{^+
zO|<K2%CEM^BD6y<i?nm|X!ot@Qzy7*Np}N~YDunR5vlE{Vnm9;jxp5_8a8lFX=Lli
z47L~);<t8-crJaw4*(A8WOQtW!6vE0GMomfZDlCn=f3LbX7J3J^c~72i^fMnsdbGt
z@-D509C#If{{X2FYHIV5oR6w(m{ipnFlr};R*Kyk=2;^NC?JjvTRe)XvNQdfrPhqL
zsqaL%AL_+Rr(Gq)D$3bDYZgfCv~_UL-dnsDaX0|ux>NQ|M!YY0r!Uc}d=@px;7I2t
zogNNLcTH21_f9L$YA>_~E^={AA?ZdbUDdD!H)_}hup^#n1bpYsI!E1rIuydpc&gfs
z+X}Jn2*Ki-Mo)M(z+?ScrQrODrme(W9J{&|Bw(&hM2tb>1M^Z4c5f^(M;t4pET7od
zZO3S)@D2?&M^Hx7oYJujM?BK|b8>Wvz6-WFh;GjmpIp>V<y&bS)RFFN{BeqPgDn35
zh>uZ<E14sS`C^q@o@yx>f#GvaFD5^Y6{62~MQ>Yye;|U}o8&31!#K@By^Lh^Xn2-T
z`<ed$IIF)ynXZce0MsxqC6bYh*CS^kh^Q4>P+NW!c%<4Fw$6N2i_@~Ywo@-t!{8`w
z@Nr1YySZ^tuq6f>nq!M_EcJ5q=k`?nRv$qO$-Clre^k>V0i2OT+OUR4a5A+Vv&SPJ
zv&}m6==o)D)nAB(86zif$uy#LL_3t`gj<wX8%JI$d9IRf)F{O~eLouJ%H`^rK93#!
zQ}hm_>c3FOb!~wV;O3~$=w-=8VQ@3&n)g<j6|exVYo%oh6ocf~PX<}xf`nzlIhu;k
zH#sg@;?{qn2!I*x4nMlpXn&^qewh)BOOSgew$n^^XK^b`a?(f2ns=+p_;M+FY;AN?
z+j>JzRVUoYr9O<&$N3p^{?%&|v|ClO7Ntg!<i>s!k38RO5z}LjQ31Bl^vKvM#uyL9
zPbPz+iMN|L%YH=^RD#7mCWliaxeV04<i5)mINhW<H0>rM{yR8<{;FYXW0A7nHT%G%
z_E|@HG``Dcc5#YVCOsgiqJ|aBaV8Jqxu#Ue186^rY)IvDTY*o)$eRlB_fo(hnN~2W
zb4?r0aZ3~m4|G`CRD5Y_!@>u*Gdhr?p;Ow((8By@G?B9j36Fu%sW-0*c^+t>K0#$N
zC@!jSeyJh%NaSNRV5+V{MtC%aFd3Bp041SJmJF)sr=w62Cqsg0!T<#KUA0{@PSPq>
z@Jphq>0ASmnx?*m9d{!T!?9W^)U%cc2nA-fM3?-IryzY%ss-7X)}u$b3!Yb+w$ji%
z3(tzA)f;4)Jd;;iHhqOZis$-;IL7|VarrD&cJWC*>|j&a?Of7-B>YxcQ;UiU?!>Vl
z_c4S20JvAJXh=j!{R;An9xjtU8og^mIfgv_3gBuI+>`9OT^5-llR2es&zeR4t2FI8
zDMI<BJyIU&Pjoq5kYjaC7z`=w0j81x>WYmkVN6FNkq!<)t{@SUOjXNbl2ekE(CI1+
zIAsQt{i-NVA89m4*cIQwHIf$lGD&T8H9)5>0p^&5?!`&PYs7?*k}=N|)h9L0-6E1Y
zqmp+bCBUSdQjy1W)7EobbBbH+A}{i#aw&=qNu)XYrHr21qBY3=2;!O#itON04tt>A
zC?XM)PUvEh`r^29xu=a=Kwy^(^36ot3NA&;oC+F6V7rf1=k>T8X5^P<%u}^PRN|YT
zo<&I>G0ijpUa>ejO)|?li9MQF+a5Dq>Hz%LDfrcqjJMpo18Rq}Zs=l@I24hd>DzKM
zO(!Wz*JAL7gPJ1UdCfx_^N#4Gz!<KVrs3T^^6*odRH%D!DHp2e#@^|G<ab_z`7(Go
zjGE>qkZ@^aBN?Eis5Bt|0L3%?RP2B$go4y0`U(--ifAB;bM@6U5!Et(E=k~ToYFbs
zySAj07~-e#;2uxSvA1q9&x)0{M-T;a-P5ghm1Qb79;t*3;g2G|kk39jWsRa_+lD<F
zBS{|Nc=Oc+S0BXW8Y@aBImriuMhlV#O%zKBY~_LKhiIIsC<)CRw(9%YkLH^IRa^pd
z;;f<->+S{E!x-^SOLbwrv5%@Y;SevmpPB4X@Wk>yM?#jYgD#;9Z@X^?s-|EW@wDc&
zw~SgPLYPtJsOF5LmmkKe$byLCww7VFc&lAILPn4ocNPBtRw;cxyWF<ipB1mRi5dk!
zn_Sf_rzS@pjP>`YZ6uF%qOzaOX7BX*tfV4TBlBLr9-VLdRc0hV(5i1yX|P^ev|)a0
z(WNWLo=N<VHiSfCJB|;EfsPvhRP)4b?wT1F8%;*c3w6a>xkj;@j(8Q!4|UxW%@?P@
z*AoRJieLxm76q70<3B>Zzx2P;{?d!-<M|)r2&_+`G&6sH67By05)e&$&Xo*y*2x;E
z3ZnwI=`h#Ll;xLz`+IUD40xjvqg5F^(7yiw`0%Epc@Er;aa$U1va)OIh~tdo`Bp#F
zI?0AM0jld#m86ZCNZ?nPdb07vthqQ8X;&aC^?h1dCnZT0Pi=5pQ&)BepETK~R~-KU
zDgtR@TeyL3qdc1D>fEVUM0}&vv}r9EtfpKxIwq#o3JE>IGUFXqV@ZZNZc$>Gk+dG_
z^HQ~r-qR)bx#$|^KFZSYSbBd|5y`cVRMxuAaWKjrE6QypcxD^8$?CWI9nRZBD;xkb
zUW-Vl9BX+9{ZTo%zlH!t2V?VB`g624v8(ZcS!*S0{YB+r(_|XcFlO4kReEofr8Y&`
zXp#mix06iNE<L1aCLtCvlTq9J6c6<Z^FP-QSfz!L(=BEH06d?_gVkG~BE=gmf^u|W
ze^Py7`lqN&DE|QV(HsmK%{9SdO7ab6W|^8|<%uNZ)s3ymXvR3MBb$@9Y)ZH+&B;&h
zt^GFxTu=Lk94}QupXHGhbCZhS=(b`TNaI!cH+7p)l}09v823oI(Jb2P;c|q5UYY5w
zF>TabN-ziU6`k~MlQPV*AK{K`dny#S8%F?Fj@Ifu$^H@Rba;KVsrE~IG;0`SKPs%a
zo#g{H8%)i?9M^2y{JhsACQZ+S(P?yx8o72pEP=&Hq^ZcqJ}I%_FFjO`wBU-SF{fs^
zK12!6$na~XMI8<+w`71#cgVLOl@#=-3A?f+L+)U8UAPA;PHeF|coi94j2IlBG%AsL
zdxhDMK|dqnk`PqkK_pihGIuX^JTfAoIOK{6CdX~0E_W{@7156K(53j8vdxNwvBANn
zX?YSnlPduDG=%*4Jku@C`YYOu0ncN)t~lv&(Iq6GA`X0-Ym<)fYl&rS4nVF_9P>`y
z^i-DCJ|-J^?xYX5<3h^&ALGq(xpAD-QfW}r#HGn+X?6aF&plMINYTtgbbjio9H|ZU
zL^O`ra(}9_IhoJPqm8~eWzEbfJ-J5!h82|x`bMYiH~yP)4Qe#`43PnhkwSW+z+CCZ
zKtIbLITe0fTjRYLG}*n3p4_@(!*v^hmKmv*UP%C&&FL3OEMa8fmxEdu=3it4#aW9*
z3B^jp)cTH0Q*hc&B`fkPP7<>Y6yl&sm75KaDTd^SD4^n>^jOiE`kq(e1QvfBR|dmz
z%?mBT`{tM@J9C^GD@TXuN9q~hkn@}pa0Nsi$lM6ULbnXbf(gwsS76vdg~eF3xT~VC
z*5|+6PnKX$xB;l!aL@TOP&2VOQIk<fCBW=d{XQl9K=qlQ@+M&?nRqlxv}+h$%uk+b
zw=$p`*ECFM%t7j`hB!y0S3KDz+YI;6fS`3;(<<~o-lLKgRSc(^jABLGR@D^SVLtLV
z%YsEK+_?i76%1&IxcjEetAU3<)j$vT8#}Sb-9_&fBpl$>s6dZkIi|3T01ZHqtFhp8
zDa0}Oj_J|uKv;A4Q7mNKd*0#O^-(2Ug4m*WjFiPSi?PmeQZk2e#tHLID!X=r^82aC
zY(#fN$!<K-DfeXKfl;`SaCsD>O@kPzehFSe3{R4wxHL1`@=okxrD^cIXC|F&8bu`b
ze(2I{TWEm9J+N{zDf2`<utRf9kz|TO8yK$M&A{rV1ZBDn{{S;YA&EC+0xBtE<Ko=n
zqK#i~K4>lg`6Z2F1o4n-nnf5<j0%w<+W}%l4<s$^?_6UvN#uECl=3r=sz+u(FsC1M
z>r~o9ANQiBXq_?9`lywGx&-%-1LV|kI)Kbn=fyoDja`XqoBU-13{a*UL{UxUyQllC
z+#abIBXXeNcZzY6F@*qTr4X7tv_NzPEPdp4O0E7qqZJEpgOV~0FsLO$kK;=w<pT%0
z87B;IqZz45b|7Mvu+T(2j8o@8>OHZ7C_u#<9MtynHsYXhQrASHTQBk4#k}#JXuI%&
zW_XAo_Gl}TC2x-h6vX!rz~s@o;1zDiIRpR!rpn_Taw=_rV9@fT7j3xelz{hBksrxF
z6;CT|_b|YN-fG(E$d7ITAXN>^jgAPxG|7^TzPgHT0Slgb6=|iMLb({Abr)8>U89OR
z7WjdTRv%Db?WVrR#QC72+ne!EiL>#c<71o(ksE8*7aZiLax^rmgDjLA$o~Mk^?eKh
z3S;WLrpKGe&y3fs=wN$E^@_o(ih0r_e*(pOG@0hO{{SP3Kn-G?owQVrCX^q#LMy2q
z*iP&dO(zti>YC#Rx_f(Ui?K%|G|c4Yqy%Fat_hmAW0F+Kb=etiLkcc4l>|_3yH6&F
zw<B%|t$GPcIU1LdGOJdJ7>w{MnYg6JTDi_K#dX!IY@qKQQ&+Q!lY+Fx`>5irFj>wx
z?wSe3EQd6MIL#!g_!)E}_fCA{flXiKlTU1TqYh)(A7en`nxmR{=9$PfCBs2NNIJzT
zf)<=qo_MbD#XMl{=ojFNoRLFJjk_7*iWtvy$<%G#n#HTY@?!pramu6$KQ!DN@l$P5
zaraFtgNouj6JOD*av0yl8sJVT%$ObVOk=cB_T-6Z<R|K+=NYCTH7-6hu(rjJ=n%7K
zG(iJ2kR}t4H1G9Sr_kZ{u`BJEvP*P^KO(peDea7PO;O^uZC7P>Gz^;O7!;?xQa1vZ
zYM`7KieUqHRP{V}Od~95>2X9A@JBTn1I0c06wDvGHEe@V2AJcF(}~HXVaGHdR73N6
zLPsmZ3Q-^UeZbjc*ryogh~d}2x@3yk00IH8q@H}~BaveTh5_oj*szcQaZZ@KC_>yX
zRn~BmFY_FK=7Qmn(Ii>547{9FNTE*RxD_R|$}O>iIHZlFa6;oAD)s^vRhmuM$Ks+_
zXxy$aGgHW9jj^``)FmW&0ddA@SVCQ+dvb~aBB(FaM(6Y5uB23h<-0lRs&xh23pZz~
zry?ROy*VUssOYgZuZ~Qpl&%2sE6rh&NI-4)73dz3adl$BoH0HPXK9pXjdEIk;k|<9
zTye(~1at0??|sD8#oRGl+H#!YvsY4kYSJp@ya7pOYD~}i-%pMi66IKKe(Ny&3}&x2
z8!cl{a_yE-coZnmbtfQuEuZS14})|XN}DSY3y=pS^;*AA>F#~F)pH}7ZiDH&lW+~p
zWk5X<UWMtcGSbrdWR*|Guf~+5lVF}$#MpW>O<gL%11R3ZCba2rbH@UyVZO9w8I&$M
z28xRA-PDDdN&f4-4Yf$f<$RYY<nj&$AAI)d5*SW$y#D}oLvHF$H|PE8ce`Xm3aG*I
zXig2)1~LS_a#g@M&3X5#EoB>y)6bgSU);lNyXSB#8R~6SZe>(~M#kecR(Zk-*i)k%
z(47AORZpv~SQQ6wt&OF+t-~z1U`L8&-j5qFnl$4)16-V%o!lZUfS!xGTxLm1xba#$
z>zOtQT$W!I2A!u`#M{aM_~!zw1ae*BLa(zOWN(`m{)*TbHNwp(+NYsf71UhY2x43V
z#bkB6!~3wQfFt9Ix6&q)_G>c_i1S-syCa6wFFsADrVxl)MjVbR>f9EXZ8cHpiQ<yb
zm0)*wSFtoO#%7S=RAid#VW{Isc%UCv>o8~<B-fL$4ixPln)y@Jdd%9+tu^efTXP%_
zs@eXxdeOC=O|<)G{{Xj$<Ad3+Eol$?S0h)bc_&^Gw-h#^?)5di>=*)gp&7GJLN@W;
zW3tn0=-683^fCD=^Ioy(YcqFwlYpsMAJbbsx?|4clht~+qwMh{OpWp<kzCJE%eE&|
z)A2dWgL~=gXy9If&lGLx_VN$$`k|yzZpg|`G0iQ+p7Lc_apJt5Sw4?jhfesHh6Gfr
z?%R*5bH4uo<=sk1F@i|Qs2R65D<q)1JGh%_nIeA@39j3aPH|1&j4nUkopMU5xFZII
zalvJCLk@eX?Va$-r-tsK18^8>kpmzs4_(s5G{%)xF=Z!i6k`+|tWf5qXUHTsH4KZK
z6H&3T$7$B?cWoYOQcg%U2#=hviXt>DIW(#^TI7($-tEr<r*$eZPK^|y9aK|}loLj8
zY8F2xIu;8j0~w;d)lq`E%><yT^W81H#b1${iSP-*GI=G~e~$){TR$vPNb4Zyih?ON
zIaA2-Rqt0pj4nL{oN{qcz;?0S=AKSQDme&Vs>Jx<iFgV-Z~*G3iGqQe5(h%Onk7gn
z#TcfgWm#gHrX6K5j!cH&_^U?Dkd-4LhAM_Xtkt%kF!uHnk}F;*+S<+@vjSf<jCI|!
zT57VzouQ6JSn16mZN>SbJzHebGF>O~UhzO$NwAIG`K>xs<8e9q>BlOJ*M3Xr5KM%F
z&yiC@5Wy}n^+IV7&2e<507%bOub)PmYhZpMlyzMQ<8oGxMDi%QW@Fh`3~|>+qBgl1
zEk(?a0ce}BX|SgAfZWx4N8B<bRN!-pM=PI@;+=H^smixB#f^gk$2BOHI7YDyf&99u
z4ZiF)MolC9kITnZ0CVmik;NVX$oq)6<B?5y5h4Q^IH@-;0ZwW%QQV9Q42WzK<N!uj
z%};QEF6h2Ibx?q)+d&lck@1EcQAA+NaChw>gHkH+OLA!%Xw1L_=kA<cut^vKg$Bn=
z$gqNg4d$5|?f|#^)N!$p{wh+<xB$EgJ%gf00Y(>}x{V=sT!Zn?RUEGQ-5Kw(L=eU`
z*xcihPy_b>RZ4@8H2(nb2l?QGRr33*CJE%zqm7PCjhZQviEd13CfM65X*b4$EHY{d
z({hjx$aPR4l#s3tKIk?wTn5}otOIS}bxe-r909--)R8}ds7zUtiVyHbR26j~lN)=X
z9%7CY9n-)F*f}(^h~{7yu~FF-A)-OQyqcav@dQw@Y>Gqf5BI5!!sj4xD4;SWoxQeJ
z&wbRk;#W`pJ}I(1QZPGs`W4G`f=_KhS|k?a%O4>B0JT2SP+?&tXD6dgxs{6q3%mNI
zj#g{{m?N=KCt&Ry@fJ*Hnux_Z6<&6qHD_;XX8CRd@kGyKJ46?ZFI0WD#d#9o3e5XR
zV_}fh1n`1#gQ{)9-NgR@D9Hork{M)acWohj=7ahQ`y{h-?*)I6`l9!#Vx@oDfR;8c
zWK4oOuC;kR&z5sS{06**3v}FHb_hHQnDA9gcTC{7$nt5oL2|fb#Y#vP!U$6t2N?#D
zSSZdZkc8X4o6jPhAc@=Ha4IxJJy;S7{{SkJdq&v<njRDpO`POZ%{Fo|$K3#mW*Ovj
zQA$f?lw&m@ggXO~NjM009~3($0CPkPMT=mH&1xmuW-2fTn$q1X1>pBox{`njanG8S
zVY4N6!Edp<HDv}$gH-p}hWu?gIjf6S+@RKNYP><m`@$jzJyAgvjMNG<)e@XyxiF2I
z7)G3@_fe2I2E9v3ll(D{#d)5cfpVYSdRCHvMvQvJa=k%HITq0H;#z}T8=7e7;*|wq
zad*K{#fE9w1aV9}8eHI1O0LO6fZ)__q@Jo+`5aQ7I~A)Q2+ktzX^d1mX;oMXcI;Oc
z$5pO4amSnJTWpS4w~$GsR4RL?vr~5(jyRV$$W@b~`K4v^Oo6v#8j~F0*JBfe@$hGr
zrf^Y?X`z2K^NdmqRhwX)3&|U=Am)@~70=x$993T(&W7+uj5k!{oON6~u3~dlrjqC)
zD6R|5ICV`2B=c8pN`*L%d#>3?#=AKc7Da57Ln5(njX0nww4Q0p%Tl#2?a&WQ5CX4^
zQa60}PMt_>if(@E&T8i<J(g>SJe+wH#sK%tIFX9)10t~}C(V!0fE;8F1xX*xfHcy;
z3{<w`{Z+3^kA77j{>tX*1fas=qd&@?4;0M&tJXA<NW>#`NDVoTc%;YXg>pd&IqJE{
z#sw#y>8;0_TXYdX%{1_Hns7W*Pjt(gAlEn@(r1cuj11FwJ<(R5Bpcl_Tw<3$DoHUy
z?j%1r9$6)n85o7BWMJ3`bBbsJRU`P~l_GQ=;_{=6*V23*e2U;4kI!_(aEuCOrE|O%
zC!YeN!yX4hfejUqS?s`5{nU=8D8U?Js<c^T5^V#DwUjfVCxAMu*DHb!yF&1+fQ`et
z4ojvaApm%;c6USaJXBdj7z{9e=rRtUY<O9D&qPpHsqx?1(To=u&ir|ym*+56921JJ
zT@uQ9ce`K$0Ie^ibr0Ifw6+C0JuzB0J4Qh2to=Kr*~tT$hskRtvYE{^U1}H2co89N
z@yV#IEm}}iC_j@>*;+BUF~oMn4=3)lixf8Y>w*UzQqh)6o(g+N#7@!e+(+W3ir{|Z
z8yAWq5AmzM-20pgdRah7Cx$)I0W{avK*k+*u^gOH+EUt0D>t|g9CK8c_JGD?Cyxez
z(M+&iNo>*;VbRS&yGGbU9_AQQzj>xSXr~cK0sI;P8I}f5aF!y1Z9*GG32+z_{;FuX
zbeRkFmUvV$&PmT4(YLzVTG;!OH~BGDnrznc+lfcXK+R#jS*uBPtgXBxga-sx{4&dp
z-1;PAY_-*Oo1H@G5V;Yt;;bizC9o>oI9#bUL2n-SUx~sIgI4~il27_fFAzB$;<4w-
z=PP4&!1U{fc|di>4l`GmF4C1)mjP<4Xc{~+qTrlnw3=GR)?wN|mMWnN$-^tCCuuwp
zRu>NO>Bcy)fR5-$t>n4AX&DLIQr^ck!=yV%P<x<GnMZqSgHHF@<@3p_>ltm2r<ZOH
zMKe>=(XG9(J~*m>PBRJRvu;zR6+7T>$$z3=e$rUVwh@j9r2Sa+oLWwVY^|go^y`if
zB7%D>mAnvxwQ;(<%k@6e>-VPOSxkxJ@mis)aKzwQg<SbDS5v&xJdVV(9Fl4hX&C)e
zd#sjk91k@t8EI8VlZt|i(o=q;0{yt5VMn(5GSnuSYZfuM4A-doZo%!Z1f!VoSU*c5
zD7OqaRqDN0LklLH2yA^<ob_2ZDopjBhmEA?UxLcui6PE8s`mIc0q%;)KjS=Ngq28;
zwsVT$<X6v=)oG!%IQS%$3}_1;4Jl~jk1D&Ej_Crt(c>IaLN=15WD2fdglUiEj0C#l
z$)**2?Q@)XshFzr7Y3r-eZbh}m8BX*(QUC}IBa!_n&_{xHn|LZR96f(?*9N)42)cY
z)Ux0zJGmuXa1A~{SYsHd(=s*(7^xXtu;;3+aDy(z17c*;l#%@!hT>I?K4%qIuWN%v
z(?+1s8RG|#D^4sk%fwNSQLD#+ZksRE5P7a-W^9hD2dDn8Xfw>IacW{-i9RcDrt3Op
zv2G{Q?h-}EAR4<agDy34=);57@MV=Q7_y@W2RWp>k#M^gZyeObmOWt9Qz?ImtZmY0
z?8T`x$d*mSRE-cU8ELH=EMsZhM|AsX11>z(J5rUh+jVG^mB0tHT<WC`25A=su*Q@e
zHUX!6a#!YvU`#P5@2WZ=IOl^*Skw)~(t@CH)YnUrN~O?BC=M`ts~cN=r+Trfm`~4l
z)mzw1&V_u{+J0He^#v%#(I%#f_C$HCtFoko5zT7nJE3jd;8s@UllKSfHPFQ`ogQ;6
zel}ZZdV^WWtc3n+Z3e#!LAbZbiosaJ8x`8)CbSnea9i3eizqHfBD#3zD6S4>Si(<{
zm-3{J(`S-;rfJcv(%`nr9N~cRQ?ZO9g@2dgs=jQpsZh6JQeukRvE4x-Y-KyAp<j1v
z{{YPsGz}80%svK%Uqrg>uXz}-QVwZ7mUh56scr#R_W-~&#gs@e6z=gtxef3vYwV!~
zNn_@@QB;N;{nDAAI|}eB2bhOB>W>6g$=Okc;xK$vDoAn}xWzRk_b{M;y*$T|K|Ij@
zh&cp%axhIS<8&lv8K9SZZhX}7vt*ZV_NbUz7_1>dnkA(R7r1e>)Hh)yL9lHgR4~BP
zCenBnIwyRxN0vCo^*JZHY!|8=wrRy91KVLkv=N5kuqaz60qoxL89?r!X&xp4!T$hy
zNG^nN`%7JOH|~KYKp<k7fhI*I!!(#tkLsn8a?u6aIP*=n_F{wli}KGEBAAPABmgQa
zBWM$I4l&J354guHaZQ!|&ST@n4evJcj7T)S4cJ7A@X7BM+`dIJ88=Qy_~xOK>nbDe
zkb8iJsL4T-L|k{0Y8XegFj+YtRLL$GKmoWk)e_0hGJfiGL|MAPjtY^E>J*AQo3fFf
zah%k4Htlj+CT_gdtoBJ@@D$*(@@mD^iRQeKX)wss24%=oj%w0<HCF6N82GT}q|m*&
zJ=8edD%(?xLt?V1%`)Z%a&<BYboCbIMHnDgTSn5M9z|GbQVFdG7|5V)MArJ8fGH?x
zoRX%zh@#tR_&O+3Qel-qZ_m|3sA#HV-w%o^^K`R^h-JrMRF+dnBxq(G^;2|T3_E1G
zMn2{lpmUTZYB(f~p=AU7nXQPnj(J@Qf%W;StwTjI7Mc;qkyY(u?p-rJmpfRijzvnY
zCg-eB*KZn3emNqYZE&u3<7qUWXtVM<Q0X8b<!FfZw$=oX#X={HIgIyHObjujh4{Br
zC?RD-ryMm#%F1y~N%9w9sgyjj07Xa0IwXa5pdjXwf0`aY-jZ1aK*gGae2%AR;*VSd
zzDOU1C5~%5tjOTADB`pi1bcYLK5I3rJ4rE(cv?ro85t|~V|p9&S2j#`4Ed_vLIARd
zEr!ojeTtHx)@@?{0Ay@$<XOUS049o2oKP_5JkhbYAXh348Xbpe-~Gw~&o${fK|azC
z#d*G#ad@X5YtnSQduV?Z$MrSyh|8kZJo8+qn&mka$&A)|szoO`=DRr@3g;Zs@COvC
zGPX-ONmH6ulRQ!`T9Jk-*ve6l$DmSgk=f#!GHFR9VADr<qbWNgLbV|$Z6i`-(m4eo
z(KZj&6`YLbg8-gtXq%<YZO~?MdtZ{gvA$6^j_ISicZz5qitI`*(VZORR}QmC-YLM~
zP+YfU7&$br)gt3{bcgXuxQnp)I3K!i8KquDAw1%Px=e>QKI^F;szID_QE=#oE^}PN
zj_C*>{{VE4kxgw!lup^wE@Po^bBZkP*~JSeD%{sI)v<9I(RLYjgf-bGjMoE6o_Ve`
zX)etSP%_xf7^)Qjq3zQzlSH;Y>%HlCYA+>rWz8#ecil71Ei@i$t$d<^SYT2PtD2Mp
zMRLe9JeuYe*c+>)>XWp>a6cMmPVVW$g{a5!%||YSLdZ@jyyBKj8cvtGbnUQoGyPT4
zbzFPynK8hqokW$%`O7XupAG=0M&;0r;18-E7dub5a(q`ZsvMKueJJz7lE`wT98&L6
zbJyJ`x~tNIOksw};2Mm9`gG|W;O4I~SqlM_tej<cf<Eg(ZzBtEyNCzHSqX4LR6jC+
z0iU`W;eEMwa84>|VqlwtkMA@Zv5HOZcX*=N2uWm@1mIUGCgCu0N#dcumD*2;_=+P+
zw|OEibH^an86N`6glP1*rFPw%mF!X0>f-INyF+-Y??+EB?^z@{KbpIcGin45D><3)
z=spW&iqtS}bDk@qkz|t+XM@#p%CRQa#s_rmG^w53fWf-W61EU#fn0-vJboz}WDmGw
z0*1e}DE{bexbaLVqSU)c0Png{MZdg?LAU*0F<E=NhL2s3EWi_15l!z;khnxR%~f9k
zF24_z%@l`^Hf|D9@kUf*6kPT;*NBY!OMOs*6e6rdpN%w$t&@1%f3Fo3w!0D~jmiC`
zgF_rx<FQzQVpir`sT7xEfm>TWK^_tB5X0!x{{U-ca&VXg)|u*OHB5CXR8{(N6?<_M
zn@K)ue^tsYw5N@B;~r{O)7s69mhZ^%S^of8Y8kP0kPY~(l6FY;7oy72=v<B4PDNg6
zH%0A-{@^PI=}l?BV%DV&4@3&q=??bra-aI1O)w;=xv{%qU05pT%`{5J=i3YjpcZ-U
z<r|>fna*kQMrD)t5Vpgd;(-EnNo0oAzC$2ZD@ebcb?(MDR*O^p#MZL8=V0opeKVmo
z{4!pLD<T}?pByDO@$eM5HaEscjV|LE+A!6a^)8IM{96VTR-Z?_#hcsQAkHz(GeWX6
z-n?go%}jXr&uG}%Qnq=Y{{Wk~QNC<QsOIz?l6h+P1EECOUD=^0(5<0;&4&CM)bz4T
z_M5%Po*y@&ZHPM<gHTxdLVIma8Bv2GzD;`t<)xEO6vPk@s*X)JO$XeHX2|hWWYfwj
z_LfF*Y?sq2i0Y&R#w*dZm_4gSW;h3-UVEd)J<`k2YV=DMX{}>YF@at$)RpCT+3!6P
z<AaW$WUg99J3I=3RgQ9MaT@^4uY=VLX4#n7e6w7pzRyDsq!OcIfs;>E?5he6X}6U{
zIQum$Qf*+Qid9R2r55=TWM)59u*ka(;azhmE}(Z&EU|l`3BWy?N8J{}=;_I5vDGwe
zF(}6)vrM}sr3#<MyLza`YNyL~TU;t6g?7Ga(l*p)ErCfENXZl?tuT%W7wvIDMaiKu
z%WPXS7n-Ek`p@j(Y{rd-BE1dtSS8V;wYi8oy7CCDPoT8LlUdx$oz0dt?HXvZmHoN+
z+Au5F>2&eOi6ngE)OEAXn%UwS@98#^aL*(W>|^vethSlyk5GC$SXu2}Ni7+^N<ORU
z+4N}S$%WN{`>H)(N%r7xaxh2rR>}>kGDjj(Ow8C_3yV~^bt*{ah;-bW)Q)OP`!Q{$
zf)^P)P*#$Y5C@9$di)W^J*T?#ja0Q5cYO>1WRQ9KuC@S3NbZ><5hihpTV`P~$nLpL
z{2k@cOOj4cbmhRoAm)>6?gWg|gZ^9|sky3R6G}@*@g68ESCTu8t}~iF2g?GPG(&dq
zc^%WV<g7?dE(v!Kej<+3q{M@qRdV1*o!tFV7VcDnK%<IIGUUyv#-|30xrt;rUNPNa
zFC}->Bv|o)Yk3U0vUVVU6JC37ApZcU+%fV^bQk?d{!cofy{w;t(!xmw81ag@j(H~o
zc{r*xl}vjVx<0~lw1e2Lg|)%Qr1>pEf#-@;jY9*PfnD5Wj8`+TV8?}{V4%h@EUL|t
zqq>&=08tkRu|_c%>Z@{Pm<8lhjBZ;4x{6IM0OGHb#@5}gU1C2}NBt^QaN`4SiW5Y<
zYirGu43aoCpn#i+w+RN?x)4%UQIb-NGgqrCOu&GoaY90=yrX=LXgxz}>6zokG0kUT
zf01lp)pL&7Q!K|V(cqQ73Go7%@1W%ON~bJx+*AS)ET>}~qq;^|xa@mLk-+Yul1E1C
zBvCQVZivcxVbw!4k+1m|l6+KX35<=JM<@PjQ@Ix?*rC=?fS!q^ON3rqkx?<4Id{r2
z{-aZ@I};KfXlJ+w=NjOOVZ3Jkd8xoDlFb6H9SuGxkjcH6g{Y!;l;<BG6tXi$rCWhf
z5uGZlu~1KZR1ST)Fzj(r29dJLNME|<gtC@rB+v*}K*5i%H5J6s{{Zdh?t^(Hg*K_q
zeyS+T5PjCcqtF79MG}FxIjJSNPDViDtL2pK{{R`~eAICud~c8ve>Fd#U)XV#a1Jwy
zBHDJ9ltQOIMF$*-8+&bx*0a(&dVb+%XvP?Mr-WRXB_$PLmQYI*iO3;I=C8GQlHl4E
zZZ^>uI%JYUSPUaRIHDoa9!X)3cM3SD;VNkp+>5C;0i?pV`hy|tin^LuZfxX*NXk^U
z+E4kVbNqmy^!sd2A|A=Xq4LJ5uE5-KJ5QsnCib-A>N!)2yVA@zHvZ<(xQ=VT>5+cR
zVh&9=O@Q08A>;8@LNbF&OM;w|gyP!l1~qGKV>@z<#S^Sr<cXpj{wmr#c;!2pMnx;C
zBL!i_Sg^gVKAB&=@=bP(!X(QmJ5GF6g|oAUic}dQt=`%;eiQ;arpauMLB<I4QaEQ1
zA(nX0v|%-!As*qRjyy2$ip*WX{$r1h+V!dIgpZ7Jd#vB6{W-cOnh}N_f##}~ITph#
zcrmu(6#<);{Lw7oz(&bY)mPqIO%%Vx<$P12_nonv52~rM>JMG0cCU_VWtG^12pFN`
ziGacE)67g5V9>HrGfD0@AQMs}g!p5+WDwvIc&5qG$BLCi><3Vr+LM8r&ELr8?6%Ma
zXr+mz+*s8Xrl1l&ywWj|nbw^oV8HytQJPuAVCS4wex)DzGI-~@u+zf(BWcZ3tG9@d
zUn0gc^eTAR<YuCT3}Dd;pmAKM67<6~_WuC6Pwu@pNdExRPwKqOPX7SzQ~R$?(gF69
zKUKr^Pu~*7(QLKA^GW?ufNGgbswokU8#JKik%e^#wwoctJ`HnZw<eJ}IHi*-P26w6
zJc`&oQ%7~c@kqRy5|y+Rg<+FOPZX{VB?rL~$lfVK0N~VP0M{|846Z%+BaMd!%I1+<
zj;bqn%Ha1>4lB}N%<{!d$0+S|bGIV6wiKLGT#9hP6!CW^21N^(W06mO>y!XaDPN9F
zF58H2lZOJEj&n{Fz<DB?RcHc9o+(#2q<_Ju4yZ+Zm|sMDn6?cg5-G<4xTI`jG`zLg
zMURuosIJ$X)TjnHrIAA+2f7-ZKK2&^Mq6cIUO1#2)0-n8(!w_|0=%lk+_rf(ZsN({
zTrO%uj8`uQ6s`s<*Wt(Q;!$1<GIuKwC?c7F8x-((re`h9JFX}PF#4q(S3d)aLDzJD
zlOW6;#+A4n(@P!H=j${hROp8<T-1HGDgG&g`BOgx8tO+>o+;0fNIJ<i5?hEy0nIhn
zj%rME-7;gv9yXK7`NFX&L%D!HDk(wPoaY{ErXn`MCmt#-;c^aedatA&cyVFv@N#LS
zkhwl-ss=Nfk~SN5k=+|0nA!tkt*vf9?YqgUX$i&@@kD9zh03)^GBg+amUh|5AKfJX
z0K<cW^-){^r+U!ROxt_rn#fFKa~Ta<9RMoYFmvLvrQdFIn$h}s!$!N95NfQCfihhk
zqg}1Ox&BAmxvf>y@x92JDMs8?Mb4K6%WhTL{{Uj4y0o~rx7!mIAoo-hI&3{ICA+#R
ztabxNr@s>dum04!dqWkxU?Tg3b5hB)OTEqj6nYh;$S&?Lr9cVDs`-C)wB?&0S)yjt
zA>MYk3*AL&e+|@*kc_Y#=Af<u*qyPJBfjbYf-OKu%18%;R@Unows2Sd#WEFu!?@s_
z`KvLwr}89tWRI>i1bc9k$8^Q@HNr{>k0AC4r$_DF&LRl7$6|>80HxbX`xK~QTH_2d
zr5?#qzDQdB*5Sjedro{&R<p|!?h^6+(<b!PteK5-^hF(`dRqH<XDnCqO7x%c9l<e|
z8k37z2_z3Dyz|so(p}sXjFP@<ed`M;ZJ;o!<0G6_GS5!KmvNe{>708W5UxufK*YvS
zj~K_^E72}iA8io;_>Sux==Mu%hiAq-3fK%y6KuiH*NV>~nf~Bqx{^%9m_9R#F>PXs
z$j<T&!^J^iX)3uWS0}-y>elyPu#oN?9&t$6E089dLk!^);={Td^!EMX)?~7jW;Z-k
zw$}^$FK-|pIjKD}1Yh--isN?dlUg+MaLAe@T_<MF_tT3s{_GNn^T`xeo2R9so^t+8
zXkv~hL9p|;ni^Yj8#v8&@nXjpxh_X&3N|YF=v98K<gp;;t?px0h_zD5AtAw0gVC!c
zD;gyX>~~>@G%Sk2vS|CI$v<L+A&BHv66F^~@)zmw8ghs3y&BSFTV#z>A7;GY)BI{1
z3GCOQS^{BE++x04>eG}pZZ>@%(IX6+Wnmykf(gf(3gvg@qwG}ADjegnQMulO+x}mQ
z!mf_|RXZ7IBVx-b!wO_zd!ztPDXyhXKtGy(N(_K=NcmJ=yRq`&h~pTj+7vPj92$`r
z1UVTdnHm28FCY1;yJM$FmO!9`ka(wBK?yAKu2<@zyI+p--2DnjT1~r0pA{P0K8eeR
zWV^`9<KyC}{Y7+yg#+<dGdq~dg~95nZgj~mrpp{+ijs_zVoE!r!@V(c2Ayoz61u5w
zi8ZIv^|)Dg2^8f0*O~g0(|U#Fv4#oVSN{Nstf<rdVWF@v%+kg_&TG<O*WsGmbk7y5
z^o;SFm0EvC(l*+VI4LGS-lfzvBP?rckj4A24$ysNszMe$LI(}^ir;8Dp}Eb>ha7MX
zZH`FG$;->7$&0khKUQ6)BeqgLBbunQbqap`R5#5f<>7tVIjOy|410XnmGuom$(7OW
zy(3BNY2z2!&W!E+g)DLVa`JOgJfG<u{nU^RtMWO`V2b%$I{eT`n`)8WBFHx99GZcS
z=J@m~OL6hf)jX_Ss)Ay0KqtC;zR*rPq1pK!Dds{32?m}of~OuxM4P3?4IHCz0eGR6
zLwvU2(Gjb0;8N8SBTW{tWo+aJ{{U&MJ)*|f60tn*HK@Oz_S$T$81gY$Ejk8xT1=hA
ziLJOE>7>_3pVh-=Da!kz+7(fQj8T@X5^!;q^Hov2f<;EoD&E*D5rrgu3hATIvgh5m
zl`$3rW2%lo2;1M{gb^%lxk=4Sk^F(?o3T=%q1G}~E=@fiK!vf@M5lXX;19aykwf8F
zk>->nk^7Q9?0m6OLw9j##di>DIOQsR#647VPA(rTKeJ4obSqcEYjdehcV;6hbL9Qi
zRLLNPwwzR}E5{zt2H(1hS=bN<Iio2#a={p-!UBco49eZnu$K{#4l~_Ccc^A7-&Gpi
zvH~&5;*|`?N<Fzj#(vE>6xwmwp_oel0LY3<jnPK9pqN@Gc(*59)It()7}Fw}O}rYG
zB@!Ve@I^urwiDnN98^wY_De~Pxy3~CFq3a2ns&rvvy=Dz(cm1Mw_pJpE-FDPyBOso
zs)geNJP=o^9%Yc7{N&NP2V}Q5YKiS6e0ek^mt;oCA7;8pt_tu5KI%4wqaczA6fBiP
zEw36m)VgG^d8oCk#A$?)a1Uamer0EN0*06)uvIyx@j&xrUi9qW9ECLb9k2qD7*}pF
zzQQs`)jH;4CO~4rZaS$$U9!v5wzFUA5&N0i4>jw0JP8y<Km*$qkn~-K5K0thy4Tn-
z4>38zU<$JfrEI4%w@AH2j9J8lXLDwVSkZ5gD8>as>yO*7^k{_VP#@K4vl~OL*nZ|w
z_@V6Vk)(G#9Mtys7qTy!p4C8+*Y1N#eUUdFhewT@eyLUzExh+gi}E-bsZj7o6zZQ0
zPFF&Lqm>jb;{b)*!Kqt+fyQbZh~1)P!RD58wMckIov~t^IxCK<W;pBnptOkGZ-!Bi
zs;~++@&1!j6s0KHGQ~zzh-BPTa`)z%n4Ts^@1sPJ2_Oy{k|>cz@HsphYIZHEF?!~W
znb@Gg@B6Hk><qF#4#KtT^IJzT8;2vW>aiZT(hanR>Ltn_<%-WdO<OeM%$rEdMRG)a
z?rx}<GWcdZ)pqsHRcMP@LmX#m{MLTWAu_Y06UkbD7neIu&^xKU(79IRR1(O<5~Bd|
z#Rh_i9g2qeRR*P<&7NxRLRC&ibM;jkfIIR9BSRSNen%3dLyVfR(()n#s{K_UYv`k1
z4r<R#Cu*-`3a3=R+9V&ywt(ZhDG0|jBsuKSD%@ig%v~3G4%2wLLO$!$bdp*mKZ^4`
zC?DM*{EGD5C;tFR2jaLMsJ>AxT^6f~aQspx2+bn!YM!X1=Q%XKDXvIiP9)=+k?q+Z
zLsXSD&T~kllbTs^xMG?%@!EV7$bz|Y4LD+&3gnM8jthnxIGmnMDaJFJYcHDYf|#Sq
z;gD`WHO#mfqz4tw+yPY7Xl_SDaNy>OQVF2EerkJm$RfIapPOsh_EVNVNn0C*KaUi~
z6ycnBuDo9?&c><vVwOTNO;gFFe&{(pgsw*^$fkkM6r-v|?x)cNka5K*;8H(z0Xitf
z*WM~8O@vN2S4sS7s(x6dknzP7{tTW+N7Xu-hmL9B<kj%1LNGg^LsRV&jnyUE<pnw=
z+Ze8FG>K~*tB%WD{{Y*gf##Ku-8ap18tGpJCtwlXMmTzqV-&?7BTCh@5zW;njMBeU
zzA=imrC=e(SeobHbxNdfu9ReVRYiPTLwOKq6wrQr(}2LH4DP3WpdBYBkpBQ0S!stj
zsQY9ZIciBq6zpKrB+Uw_hspW7c0`~)Fa-?66wY#dQ8LONJ($G=uBZyI=Dw3I4?HM-
zaZB#x@k6ua_^A<<4<?s!1$SU`npmY}eCCIiMrO#QEsP!16sU!ucV#^BLrCGOHFDAL
zd!`YNb5=G%`@i{Aws-PdNHE6$&`jLZ3FkDAkkRORKBg`q0Jb=*8Lnbfg*W8q<ya3-
z%_r{>GIo>AZ(!3F@^BGI0B~xinlnx*K1*8zY^(0D&m2(q*J&{^s$iNfEiK?cC6ojn
zDN0*z_$bKpL$a4vWHozf6O_v5u|wz=P}$wEk-2^<-_%T9izkl)vQxt;mvbpzawsQE
zwlvY{T5_w}$P<+{Y3Usz8<}SlE>pm)*QKr<>_i7SCmF9!&@B?m&@ee-GHaos#kzLa
z1rGNBx1=I687$<%J&GLqI^`ty1jlu2?cbcJ!8AfJ-UA*p&2E;D9o-GzWqy;Vi(5rW
zm^)RE#bO~;3{$U6LC3N_G@DGZorfHCS0uKDal$bEvBZo^En+4cWFbct=$@yy3kw(S
zy#D9|F~>r)O0J7l&=MrcbP6#Fd&O%#J$^0Q+moH%Ydc{&v%cub0OK{J)9v7r0VhVe
zSsL8vqa)<Jib%x6YY&%Zy6)mQ(cy2BH<M9EW9-HxQgV7UJ<BuUT!PuEpCVW^wy?Z=
z5pYkMj?yz}a*mCSr@AUvDG@@uPl_5_D~F3axHQpwU7}wnrsy|GJ+y2X9y!6QmyJmb
zry$po`gd2j({4meNkDLOUX^31!+UHV837=22a4}u%^a)ZJZ0gKYAgO-pXu{d`kvUx
zRUB2`<d=5->Ze**h7>S46>PZRu^H*X56wp*F1r8%mgwzl15pE-t*b0dZMbM}6_nuT
zy7Wyi5?Cv=pDb6M$iZ1pb++`3&^4Q^t@0y|Yr=n!>9*xk`#q=l?xpQ<`Yd5US-Huk
zi9M!aj2{$%N=uE~K|E9C+aTHoO8KubO|B1b$C5XYj57+7Nlb-XU|b9V_d~DFTm${l
zSE{gtD0#(K&m(+oFCg+USrnGr_9|D{(*{G1C~L+np|Onlr$T-qfz2bIG+nZllPW1K
zmd$h|oQ{u!QOdy>6{yjivH52-oW|Q@DDFXfy|pUmcjlpq$Gig$4Rbe3-aOK%EZ+(@
ze6dt&!X1J)!;wspW05{e4t!#p@*EybN|DI1iHO3`O5LD3OOrjSX+`c-tYdHWN5yK;
zM5>L7v@EW>jtxQpW1fJkR%u0W%H9}H_b!H7L}HVB9!*VixzESCmU6BXq3gPZWMq>A
znxbiLwp$CUL_XAowy?!b5DGAPsT@4-xSUhWVF(27G!ms~eebDuMT`^lY3K@^j;Z@W
z#s&>a(SW?3DmnNCgR++AIH}-O7$jzrO_&>2kys8Xl4<fQcDM;pHjH&r1C`njH55bP
zp1(B3VeakEp6X8`MO#};i`4ES(!I6i+N-qKNVW`E00vDisd4V=D+g#eCZo9j01B0f
z_h;(58Yri8Jm0F#!ZF4F0Fq>5Ba(xvGRo=TagoTNbk<;31L996tgaS6d}FHU__K-T
zgAt3cbLO1Uvy+U4sNtUj1aVF`WmKRU27r0+KPu{|ILCjQk!_JOOLszTn5bd=PgLUW
zyavmCRNw<zDFGlh6x4{Qv0{+>t^tr@kuBtP%IAuNI%&a8xF8QWp`vErxVA-EU9S6h
z;#CBL&lM8eL;)|w7P$g^1Dmbjaloi$!wv-&Es(b##MH?gXh#DXpsY5EwP{Jl(lJxT
z9J26NBlAbhsL!?_KQv5s=+NaHD=&0ZMRF;lw3l)C@t$es*K2NE;*EFO*bV*EPbnnr
z>ZJz9MP!9{WD`znSm9jfnuTssG$FH^Xt^?fAaUY~p<RiE$z#n%+X9kCDYr9WfLaFT
z>50Lj5%SJd6#oE8G#$i%6N9)8>6eoc7S!&m9#<i<_%#98w@xHuw~C)dv7Y{61-yHk
z?t!wmmgeL}r0!#ZUZJ6B@o2heWl&~i%O9$ELfEsO)MxJWdwnL%%W;V&$MO2AD|M3L
zz4>DUCkj6mqtxyN$#W+oG%lVZYo$h9Elll3G>b4rzFD;NiXz#m9fZNF){I+P#_Ryx
zYL?n2w9>N}<(Omoqjg)SYime|B>5D|N-<Vc;^iwwgiNnA8&82nDGPs+YZ^5f7V<Sy
zjlGIDP}T}wL$K#|GfML$tdC75=^A*MR`tl|H9P=&<kbPxR9z{@2N^YaX)-k99D=#4
zAvD`Ye3+b7lLp%{QPHP?hd-K)ux>#g@lKJO++DtE&DCX9WTbJ7;<`bA4st1yN+Vs&
zaZPEJ;L5c-niWBhc)!lFuU=_fo#j+_N`$lk`JpcEeT$gE>8eSk8$pa*odqBzIUgmd
zI&Z@Y1+Kzgm`VOaH0W*wu`pAXLs4XpM0lmyB-0q=2RvsK%~IMs+c$}Z;71fTrs0uD
z@;~!L>9ZELD%`YuP*YA$EG`ulBzaD)WZHe>qc6ao4F>V4kK6<Sp6k#(M`0ZEJaVo>
z45#;Ad3Ox3PVpRtQ(1CDYc}Ja<P#Z>+XHsy{L;rVL<nFH;)IN(hj)=tva>PV4OHLJ
zVnmlLpqY9fRZV!x<dFPTjNdgOPXeg#;xXrI_d&4Hk@aeUeHjOw_f|SX;4eNZtJNPX
zsFOK4?yR)HLZClYMy~yk!}k|aNzW#WiTM-+Yw`rq(Sf&$<)Y%pO>g_OAmf_!{Uo1d
zKdSQGEPvdf@0#^3DRFNZ&p56Zsi(Oz*z{UQ=C~8)xhY0Dtm#@sOk<ywIOi0Pfn0<R
z>bt2l*n9}5oKnborsQupu49~0-0Ny3c^o>X3<jqmBpPTRW~BU45a-Voz>IJy9E<@-
z8x;GJeS(JZxaN_-;*=4b(zi8M)RP=l9CKY01%By;iKHFTS+>bIgKr#$9Gn_(Clw4%
zS%0dSAB}oFJk-|X9F=e)<0iTKqz9fWo~^rxzC~WCqagK4px0+7nmc-A8zpk}T_G5*
zAI6h!$+RB>2lB3SNI7a*2gmbO#`duJ9KHoeI5iRsj_Lk!TBE{3G0byFGf3Q0kHtKH
zaIc{=%_D*LU7R;m**fB<f(ynonnlLyaQ-yh^NKHqh%25cSFUNSX*0zg-y&B6ak%1<
ztCaIx#{)F0pq-8?FhG7PL7W<o&s5UzL?oQj4y&B;UE-qs#PBqpX|f(EK>22yA;{*1
za6|KM^^i;O6dl$XH-UphGvV^foKW&e3mlJ$ucXV9fCx$d0D9t1c{R_EbDpWM%W!j4
z^g&hjlb&iabJaPH1x8qAg@hspDx_wd?nwMl(KpMQlXgZb#P}ejl53H3$C~BNMtG~w
zPw7%zOtLa%Nub4(rD2|v(~O6Fi;j3T=-LDeYT&z!c+CrErrX=c7Z_qX6=-2V>ZN!-
zX)MJ#?qjuOCO06G2&xNfg;_nf$vz0H{YK&lwAloWi7|jH0Cga4cIRmaidEPmw<BLP
zb`i!JRLM1y^#N0Gr4TCTim{IJNp_^1{Zi_hf9@azADHJ9XqUC}So&rJ5ysz<UZZOy
zV$l<CIPScgPL@l1RF4O8o@>#x3$NZ-MsxBU8tVFdlembVCB#Q_3=%Q*Qf0T}k|~i!
z$&7QFG6AvGh{+x+qbYq3kl0X`8;>7U`DGDgf;gw$N{uhLBMHYss69b`up!w00Nqh3
z-(#dxdbd#$6CQI|r(ye*jDkTt;;eNGk9y#o=7rOwxBc6+hb#vbhdio04JOhIRJK=G
z;Y^d`^H)|dBx8~{_)+4i?NUiD3&eo2JQ}=$+`tu_=2ot8m?{RDep0qag{s|7ZG^D+
zoaZ!C239+y!&Y0>n*FqA_MhwW)j53)7`9yg?A(PdjEbG+kQGhGs>bPhpSVJo$I-0?
zo#Q(MY%uWDdIX};Yd5zlZDqsbu}6A#>RSmw`E4hh(E58@$vBL*4lqqz+*$~21A({>
zMK(N-9F&m~vr!C@v|*Q>!zQV<h{w6fz^7>Up8cp`FjtE0PBQWCbIp1r_bte=Gdp?}
zWFbj40zM86KJXDC!Oaeh^5?n~SWNCZ0<=DsRG6SIw5j{7+ma91qV$WFwYv-t1od1`
zTG7Wg3$Bi@rD~;vI%(COk$#3K)VK#Dj8F>h6#IOiG*lNt{=r*3sKq|k2ol~$G9CHg
zeb>$MLHms^kEZDJUN4_zd~wJezgel|CRlkqj)gj8kgvo51u_A*mRz3|p4xPDZ*tru
zg+u`ub4`L$xeB8P?xa;OBRLtSs|coG(Sy6M#VoII$*rvoxs4YGCz?SPNZ1=DjcB7g
zayt~UW+_W5<wsQ<E%F7IjscNAIBpnJ$sS@QJM)T^MjtW`&x+{W09AfJbT)l~&iWyQ
zF~)pREalKfbGDh1KhuhM>#iy%Ld%z8yeSlWl;j#=L=a?uBi%g|$lRPQIiPWZNW~VN
z+FSuNxNndZCmak4ArW0Sc@-P~0O}A26zhHE{j<hTbcu7yFK>$-8)3T~REw5k#{g7!
zP=uH$&2(-4gGnUri5W&p(IXBsntt^$a&t^8#@{s3xF5$AT=DPxP*W#{RWa<_{`8wv
zNCia@U=KASWb$i5lj=ooDLx5He71u@-4cq;oEjv@AR&+vJesQ3Wp*Ms&nLRBP*aeb
z)D_q)C}fqL&n?vv?8GA@aote_F~9&nDvlE_<GUv{+{2Ayf{Hv=vpdg~&4yXR(cmj&
zXPUKvV=~~7O;y>FWZDf{!3VajAsHvCxpIt;u&AN;5w_ssq5?F0H&j)wV2g8cx{$<@
zu`0RysL>N4X;2-Zz9Xt?g=XJ_nv}&DAe6-u8b(uqKNM+_!Q_#fXxMnCB(1oBFe*tf
z7v+r8+BH~~L+7d~KE-VVyUMMQr}I(H?<m5KJk*n2h4>%&sBNTgdmv>^1{6rC@e7;^
zVMzvXNggUzl{b;m`lNCK?+wq@8YPQvjK3y0u8@<GuYu<jG?U0X&Kj5`EJz(xvQad6
z-NDXkPl7Q~%??QrGb#CXK_v2@iySbcbP5Zb+jEYNX_82aLdVz@62~EJ&CNk}8aB>x
znh1j2%%(PC%aP)$-svAblj5Cp$;tl!ngQlr$J<ewAQI+gUUNcEFSvjKh7}a@mzQpF
zQrZDL;uINTd!mCzv-I~$X=gXLUl`D26IYu4ry~ZA-~RwlIL!wO$p)OZs@qTxkriKc
zERS;>2r(8j{nmukW5HU3l&zCVZzMLV&UY5(i?Fs?BVGt{c&5*vZZ*z9{{Y2Z=oXCk
zFsWbQcOUAoWPds~#Z)Z3(n+|ux3F>iSDKSm_9E660{;NVp-}1U@#~UGya0Hh?;?Ah
zR^sTBk=0!}Ex(eCaeLR$+la^gNx(SB;<_Xz<xHII>Z_7@=G9hKQX>Zh)Rx!pCbH4(
zKH9b`ESGN0kxr5=qGmUa8#_iTU!*Z-U8DUqh?em`>e8Z~Fe`WI)ZewU#@f+=H#UrV
zs#wzt-L`&fo<LX-d7-ttnCzue=enOx**Zreat&yEw{0+}t((DhSy<T~!6uW_@_Ux;
z9GsP@eM1<I5&1M0fhvieGlNw-j#Qa?N?033`Vp|G@l^WxB^8u%r#Yka`-Gb2NXZ!#
zFQ^JfY~1x%<!)G;MHp7xsWQ6h*`5jWgN|x@88%<K=Z6(k_elC~^*Jhfq>AZ;*7phy
z(r{}UojW(YcN#3UJA#XYk+rH@8{LyB1K}X#)AiUz#Axa`WvH}^Uv}A}ZsY2&O~!PL
z{{R<?*hJwj<K6zWIjpy;G*57Q@q!8D)!Y{b9Y~@6EyY+}2<~8XARKTi*rhwNnch01
z$ft%|W@RG=qgkbiOXG1hvDLKb%DOng{{Ts>ZOXJ0ymB@ab@{4i4XnNy6qh7Y$P6Y=
z;^)OwUKSvhJW<yYVg_aTeN`ma4C+z1HC+}Ne^4L)07$4G$L_5(=gO`R08m<zB=;+m
z$TVe&qVPOb8n60HG5y7=F*u{4eg<l?5_}%0*cgy&lARY8+G7EC86S1(T2b8+AH{i2
zl6PH3evNvrk^caqpVe@^QMV|UN21tL5=C}+q+Hf%N~sl`MRC9t&6?nU6r5QGjAoUO
zRMXxm#Amu#@z7b>7~+wCG^39cz<H-{m1Gsga36Kfc@@NxC{o-{0&$u~G??a=LBPcZ
zoi-5$X+OG9c*ZHb=BjRz2`o*t{wSb0ppJ1w+qcJ)n(X>+WqTQ)G<=R;8oBze2j#^p
zWQyw76p9XC<IOM=yP9yx6wmci$-W4MezRP?S2tC|7^1jj7zfWeu9K=y^-{>(U<ydI
zqQm4=>ZGWnIqsy-%C%!^kdi}*uJOeo{MS|8F$VLRY`Mo2?DA<C@kth_BFCM)_f7u*
zR}|sErunHpXedXd7{{7)gkqC|DkaGVDfv^I_|jwLT*HbnPKf&ye;iaYHr$$+4mqH%
zpD%&Xsx`UR3|of99nzukHsERF@k7MeZ^aNgsy#n5%a5MN%N?v{epHfVj;Yz^l0$*b
zbW-?~`9C!-7=_?whHMevH7&yt#wd3m>L2f4MYDi4)p6Yp=LU%&Fv#&xPJpgzxvwEa
zMCZCeir_f>(`vAx%6ZADP>j?cnwBCBG$qLkZ3f8SUaArC73umml^jtjpa6=?dKS!K
zWC@ad*Q43mEK%<*`R1gVi!7UTGTIf4@-oBjq?|tBagWKU?{2+_{{Y0GllN5Gj;nak
z5X-<GDGGlBb#{nDA>Eh$>oulm&wX@UwlZ@=HT~7a!$jE&pEa=brMWD)t-5V8o4FRK
z?L}SO5^3@Xq)-0<RjR{E4RQR`lULrV)4k0-W!ubs&^ic6<4_kSs?U$xp3r1gtm~_I
z)-kjU`K#YW&50Tyc&g1w04E-3&qg3yrd;z+qIH!S>Y2QVjuaXyD9FiSP+6)0wZE6$
z5hTZEz&IXjuP?!+0cuh-ak0l0fb|XL=^1lcy=L!XFfIPo=eo?k<ivx5O;I=Cbi9Gc
z`&FEp7fAwp&=4HC#%r}h?b)7rG)JVMX1-Ft#<|($ZiSq<HL%$=x&XWoJE-QmWWz8m
zk2PbePaIaeP~?$=S?H4HSxN#5pSsMsbWqUkZ&XW5V{-48VZzs$U(WY;Z!-gyIjx7O
zY^7p-!f-kjm`g%t>x|aeVQ@6#Ng&M**`MOT^X9H6j^j^}9cB2QfueLPRZY3)G`^)%
z`<09@#}#gTaaga&CCO`{-FvdHxfv`@aw}~;%Po*4PBB<dNh~0c?EdeJ^;-!YjhPO>
z{Q+FoycEZ<?F8Zyz+e+r7h~Lnob}?Ou|Sqls2`dxD1qn4oY$ns9k57rWiMl8keUFG
zmQNhkqgJ*+ej_=m3y6q0#}r(wbU>gS`KMzaAaRP4G41jIIs2qBw|*%kw<=@B)3jdt
zS624rF-+eI)~vBQ29K3JSDCJ2kR0G<wps?F3<5S89u0ik*8M%{a<>)3K9A@=p&d>-
zf4LV0iEX^%oU5_6_nLSKxKhk^xY#m2>(A95jyW%cLdxjG4vjcQxentvu6)Sg2E`Nv
zQJ_HE$?l>~L&|iBqj4i27#vdpa`^F0JV-}XG0LcH6H7MeD~3e!EN{^jAd!AAjAO+`
zC^9f&Ij4XE^Mh5VB^zapM}UTrW^`T$-F4r<?M4lcB8LhwkZYK#j12DcOr^RuF2tHh
z=ib;Muqr@Fiv@{3Dk-D}Ffoi%;N9%x4CbU;V@XmY3{;Risp7^11M;TL3oK#JXT>Z?
zpSEXwhX;x(528v*s|dP%;Q<)@&{Ab&-QZM=<>3OlJTxagiVO0=2&gUz7Ef?xMo*eX
zV~$NFs})iJ7^$8;=XNnseT>>pg@t$E3Y0~e3J$*Mu%_NYt}}aNl5tU_*CjKPS3*yy
z$7KsdaC2GFDoZQKImzO(pQ?VQT_pmnpvxh^0M)0X=NFnJQkKk&yNz$tX{P@G-Irbt
z7piL^1?@rd`!Aw#f?Q`5i@1WyhZWZFv+<nbq(H&ICxCjb{trLopG+}{H!_h{l1^n{
zu_hI0IAYKF5DZc9sP@ZnF-MxD4ld4rbg0H~DN=c3m19xgnvl44Di4no!MG;k27c;S
zBqd?DC7c}8J6vRj$fmQ9MtRLk9tb4xDt1nXDNUa?=9k<hN~!q%Xd90A0&(J|fr};|
zii!9mMDdsz3cCEh>5v>qMnD72GDP_2IH-a#<2-%Q0VIVz{oh@hl1boy<JhK%)duDu
z{wQ){RxRhnLJM(lFb3LWQ-v92&TF05hj1}m&l3W00r5l0JPn#)Jm4CH>}fIwH5tOW
zGM~t<Gc1voObxVA4!OIL2?L6T1~ao68==Kl>K8!d1Cl64x^HX+Vj7Bk@(sKMUJM5;
zdht}!v!t9J>PfE1mtzvy?u8!YvXFqW@ll2<g>~HHY55x2`cFhF2kr_JBXyq?o|g2i
z(;{A8AyMEFTZz^^I>c>I7>p>Z@xu9KbDxA%+$2#a?ObG@T>VuQ!yn!M0E}f*f&Ece
z7mT63%$IZWKSF_NJ{eK+&;BVgNq973#@A)elCfM)is#^Yt&W6YHuKJd9BdVc(j|{$
zfx*ZKYfo^mXQsm(Ez2C5&l&u>8zeZZFD>mEEx&LF@*J8v*GW;QHNpNzx)uw+Z$eQi
z$H;eAqI4QMZ6Jmfp+XGDInF&9nDm3~mI;BJ^*~x!DVAUqZr#DfZ@si#E0yGm2G>fB
zZJK{4lT}XymqnQMYxx+sbb%%9!;p7<*5lJCV^D><S|dxag{@?AqbJQ($2(i<G9-N{
zr-nC)?tVqdm)~0#fKONnCe))vJYtRXq>@MeCp<4yCZv7)$fO;WqO^$QSs026Z{4w+
zSFyE+`l%h@Odri>?G{Kb!6c2ty4GGGn4-Vc4XRnjmde{kK;YH6BPmOAD8rRP8y}LB
zL6xG`9J~JjBUd`yAG%$j_f(ef6}?9vb#)Zq_h^LkRN~0??0;q1<5N?KQDdYqc_DN-
zZmQ1DNKtOkK0Z8(w~zrk5CGzkowo!Z^;OW)!abR%tH~~k$yrh)jgRp-qa%$VksU|H
z$2CvtJ@JFHuya;6GaHQ}H~#=q_eK@H$LN!b_SC-0t$HzUa%55Y6gHK91=iPmh8<P6
zsN>&evJQBxdMuFKsVYg&6qA$3J4N`C%Z{0#)op~Zk_K^JdFpEtH%b92I~D0^x=VFp
zagoTZ&b4c{Qdn+A>{ZgKBQ#rtk1~<nBQ6ibc&cfLmPs9s2<TRi)wWM%Dfa&W^kn2?
z#brxGKbv5V>o(S%8kDVBO?4<2stLz6Z*0RoP&%_(86x()9!a9@U0bjAs<kn!OfYvK
z=!ge{-5m)4RdVC+nmP;+Fl&t029n!LV7CYdk}K7;Z~X|*;=G$n=ISx|uUXN*^dldN
z^Zus(aW0QVs?v_>zG()2>nxu_Q0K;LiN_SyJl7T<R5vGNqp{5^@k|Ds$22%dG&wM7
zhlV)jl|iPA=B#o`D}ig1h$os=Jab6d#}&YmxWyupyFsBkHO_DaA+BNZ%_?r43~=U?
zqg*LDs^1|OJaQ@VHq-c_uNnQ5v$`KnlHigi9aXT&vPsw+Z5Jb&a~up*P%&Xodh~d5
zZ^yY9q^Z*taf)xAsl%#h;PY2o;8UF7QVe5?SL%^!cKIP6n&L?DP9x<=;{vO$EMu4M
zrC+*<DpAE>i=-rlZmWUg2Dy!PG0kYcL>Z5jb9GMbtDk{NqNtWP98xYbP6ssK^;0yh
zG((+oDF8U7cEGL-2&qcKGjBD|z^8T1HbccqsM$P>l1->+2FWCoL`>j}Q0zIcJ?fda
z%W8ZhUI@~$I>DnOAjS@8CdLL0O>F#aG(9snx0hq`9MRnyB?J@QHbI{2n;Wnxu4~aZ
z#Td`cHgPBMs9a#+*IOT#1B!jiqK-v<9X>4JP(I=T0-D6I!KS>b2gkaJWIM8Iw>8ly
z=NnB%d+w!T7#fTNo&{AQgz-;|^IWk=f6{5|44LYJbje4c`X5q>AXMA~4;8X)LPrV|
zWE$~{87H=oG-Qg{XgcJQ?S|$+xbsfhW=y!BXxrXG@+!EKjCDfX#1b8>L1R%xbqIu?
zd^guLBe`ieth^5trdEes>Gs~hmujolgVJdjKvx8c$Z9e<M%^bLR3?q;nRIyyI4+s3
z+C0&Os&9hGTSu;Iku;httT{O9vASgM9Kr%A$Jh2ZdQ^~<D<cdFgFwuXHNoYW`TC~U
zILb-I*(LjVE*KdcmN`LB^xVGQukbu(p}l`*%Lsl=P4uEM)X>P-1lDactf;5S={B2+
zZ)#hKibiQ;3mWHl6<4fX6^V+l7_QWsG$TFgJEQI#Hx-nb#Lj4)Ug@Qjjojvhjc|q7
zbTe6|>y|rofHg?OlSK~x`>k)KwB@%z*~tSnp7iCJ)OALhLzvdJ5L_+2%(0Gu;<-|l
zyD5~ci&eN$F+wp%`hL*M8(c8s{WU>y{{7|w&Q2?Fq+1v)BSLv1w&*bLmgnF^?U}z(
zS#F7$hxJ(#ZmBLxX1ym_w-SQCipc6#;xo4u(8~^OqWH2sGYHGYLF)j^EKaSA)Rs#l
zNH+DFkJSU+xQ5&upQ_ENuV^8Dok^t`00IYpb#H5Y?RK`3p#&cuYMax@!I=cx0(K2;
zJuj!g?AJ2k&x2Sr_?}FOB9va~;f!hk;0m#V9n1mvgHXc{w0EyxRcU6SmnC_xK;?Z7
zl@*$;kwj+i1B%Pt!oxd-Z!RKfo7)|JYdNV}nB9kUUpL6+Xs5>*=9(3bRJJJVm<Tx(
z9IA7H-7;1}2LP!aMRkU~1%&Hsb0Ai4<4|fBu~^zkEX)7{&3O-~HJLB9Gdj5ePsawi
z9;*y_G5d2y{SIm1$fpO<^)7{|MSB{dVZ8KnR?{elgYx3Oc>O>1l#$Ai&mj^I;z_S{
z&^2iFI}llix&?gmQ=*m`HAm3&zN3#k6x06EIYQYu#WpelIH$>wPc+z3OOu>eGP$F>
z8K)Z>+l%B1Buv5`n9ga@pb@l!MHu4-p+}6VB=be55-BNh^eGZxgT_T9QJsY6C#+Le
zMv_kd0IEf~**HKj!1ijpTfYU%b#d6_QlzVl8s&VF4}Wzf)S`DMZ_N1=F!;6!=8LZ#
z5jNL7l58aN$fsM#%J>|Q6zIq<JdZTjc+y-BC@8ds+@-}5Pa`qTd8m?E8!Ft2e7GcF
z)N0>7nZ|t7`7%dpT?9nRNAblmTMWuq0M|zG?+VpAB1<D=;{%V)7{&BgDX5HxVgt_x
zr$%DD4h1$ifl^4x@l%Bs(HEbA_4h?Z-4x`{70J-L{{XMzD+~IM>bq?n9D9H;91QnK
z>;C{*TIldJg@U#|DW5g-x2wHHcduW)#O`+Ifn8k}r#(eEbdDFPdbD)EvzdSRC#f~t
zt#elW#O_Gl@fqs9=k)XAq+5b`C<!(5ZN5ya{fhcO^!qKM+kyW8uvcqajYAqp@|tY2
z%aT%)<(m|hQ;3_BQb3!dV(fQ#p&|@3wGMvh+fB;dfKT0XF3q(;R;89Lx#EpgV;=Q9
zd7$mo#UnFv4Hs&g^N9k3_fZ6v*rFq=FUXD!a3m%-XYQM^x%mN){L@wmLk2Danjjw&
z#lHu|D3M*l0mf;PD#l185NV855W~6_8Ya3>PQ@J51j~Q`<abl7i6ZVNk>aCS0?mQ`
zuQXT?rs0fX3U%g?vuwaz3Tp<9j^mutOCu=&3YJPDl2=@=4KW}j?swveoqxoxIcD)e
zysimc8XwqK;C;vngFJylD;XTJk^rfpW|_9Qz#eKVh~6?zM%p}p>zPo;^Byr(l1kB^
ziNe(LIgTP2XLnU!aO`3VSYT27fQoh*zyl=E^1MTWIi^b>&RFx}qDY?xR|KK+MG=v<
z-*B+`3fz92dVO!C7n3Kz9tJBH0xL|T95KMJbM$SrT5g3Ll7PHt6?PaYJ7rm>(xexu
zm@MtXFbb$R6`7vz_bT^uaT;)<jlG?<D~EUD6+M0`p8o*axQbPYw#6TRn!6;p;9M~0
zlA85Z)H<-tjnCjv)<m?7agYzTC^=>-=4_L^?Lk)BJbvxuYqtukx&HvV=Vg3aI<Qs1
zbasJmZEfSoDa+OLx`cDx#~KgfHq?4tYF6U)m$8Mbze{>{E2xdkyf)(1UJGrcit|b-
zT!8hvf<JF&hdIF_udJ^X(i>$M3yQ~UYjOVo<pT(^RwIhh`excm?;>|nG2LlR?`C|F
zOC7Ume&H;lLFS(+*`{Fm6egg++(#ePPpP&okpZnLM>lUl8h9(FvCal79&Zo*LgxdV
z5n9`+1R5zT-rDncubidCgkXj=ndR|H%{n-4lV+<(wvz5hn2rFeTX+l?4xfEcevO#R
zZyTOT6>D<gZr49`BLzxAWyvk>ArmeF6UoJPprT7<Q|6<R;aS3tYp&Rs6#Z51QKv>_
z7MijJu}I-kd2hO-x|yNX<LAw2-N>~B^aWtGOUG?#anNeVsZoSgeVeqKvPxEO<ZyOB
z^-a2LOB+zo$R)5w&Q2-Q?rUXn;<bC0xG}g><gWEL(Hk=y0z0U^A9N?OmK@|SR8OcO
z_iPmQaa6vKb8mkvEHS#X&TcuQ`8DH$o}OQWO2{U#bZ_*U3R_qLMIb<N!J}hfvr7r-
zg+UUTA(9^|at&!}FS8<TbT(L(S(iL8_|+z@9#rwiLawD{Yh`kB(f3sMbH_1cBO69X
z=7&QoXPkQa38b=Sg-dNb{wvEaOM=DN*S}uJ3Em_?V{wE1*Nl4Bz}V_;sxccJsj8<J
z*?uW_GIGzh{Sms&Us^Jj;PF|DnE}*Pk&+E)EgWERiqEQF?K0SY;?_3ijDV0vC<qDT
z%~;#1I3l=`Pr;=Z8diUCAO8TXE7r7-78u9iSCnZ#{mfbW73$h&{+xf%uRrRK<r3)h
zTCFZAw4$h!N|hCjam{cU70=ywRZ6YQV}j<nxZqNz3GTX3Mk>v(ZJ=Kwp0iQ;r)P?1
zny)Lcp~UlC4tcI%NTg0Uqi!yrL=bXKb9GD6AlH0x&vdlHFjo?3!xYnu(zeuT1YhMz
zfDSQ=Qh3LjM(U4_I{^3_YvVLS9Eu5B2RNdU_bqjOKR3bSWf`N*j$TD@@4De!zskC?
zt_m2kbx02t(l->G5l{TM2Z1wO`P)q#H*~JtQeVY}xj8uIr3W<}Sj9YJb!f)EC`ooJ
ze{`d+=^Xu11A$hh$i5B-<w*TfXXI;`@ro9dXo9XWNspFAa-0fGoz*-boZWL=paDqv
z#!VnU9;t8SK^T&8O_1$Ono<0!E2jAE2CB_B3{FGET&NhFzv7ZzyN1(Uf<|*qVP0c(
zr)#rq4P(tUScc6jgNkQDsohevFj_EfbImNc12ilTwoD$VrvR>VUZX{*S@HAQFy(`b
GKmXZ^2xo2p

diff --git a/modules/gpu/app/nv_perf_test/im2_1280x800.jpg b/modules/gpu/app/nv_perf_test/im2_1280x800.jpg
deleted file mode 100644
index ae49640a957d5a1bc26e0b0add1ee65a3c2a7bbb..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 143366
zcmb4qXHZk$7j5Vr1PL{Cl#Y}{y7b;l=)HFcy{J?bLJgrvq}LFd^s0i=5s}{cA%rRf
zL5d*Y<Nx8!ysz(^nLBr8&s}@Zz31Fnd+&ArE&kg8JkZh5)&St);Q=)7C*a=-Kovkl
zKuAbPKy-g0A|fItp(G`_A2bvcWR$ctbab>dw6qTyK#ULRS%9>(Ok7MXY-}7H91j_}
zdAZqnLF^pt{{z9hf0TrngqoC;nw_4Op8fxO{ObeIk`izd0SNH80r<3d1hjbn1^{dT
z03OkOw0MC3I|%Xc3GRagkdob-l^+1`@dya<{x=B-2=MW!?yUeqS|U0wab@C%+{R8M
z^b#;0l{f^Dw^Y)kiPV{a@sVm!KiD)LqUMtI;~s()4}eeb|2hER;R6T=?_Jy*C20Zp
z`2Rz{_d;}kOK=|!J}m*^|KN%4;U%2tiB(`g9!ZlpRYcP-hSL6j%K%D(dq`RWT7V*;
zDT}V02h-T4+LJJ7S=W<Y3q!rudY)c8v9vD4Y!!pi1pLs(rRFNK^UFM_897n;%w3xH
z8Cg8RjPHp3K1Sj6c1m2khoT5AR>g$z8NkHLR@DN;m3(A#+6z4|8L8>{_lovJwX|Qn
zpsEl7<A6#H;@1fWOKrOCZS9#!jOp^T?C;NCeX9+dQsuIgj3si-kA2sDxYugq9W1k4
zXmQc^+7~&wX1V7^`JX77<Vt2UZ5luMQ}<ziP_UIB%(GdDX($GwAL+q;(`3zR0)6)n
z;8oLbRO<7*E*81jkPXF?q?eWp3^whTT=|Mr+O{)L2D(Ih9AzF9USaa%=r}NCu)l1Q
zdk#~@vxqRnzI8!ojt21UrCkka78j?ZGS>McZCg+7{I+}Cm~25j)eGipq3b;AYQiY<
zYW|0f<d&DVcJ}Xl)*gmYE_{)bcLeTs2chM!=m)vJO9as%bWgv@!*^S(<Rj!co(Wim
zNrB*SdEA;B|JW|%0r3{c@PsrMEGtl5jXC>H5ElO~4r|tow>~KU>P6KM4OqWZ4iG#c
z^HDoT9_}4)p&*7<Zf;;fnRT3O%#41+!&O;+ZaDm{#7mH|J)y`Bv-c}624ljDJPCc`
z2VCOpeO>S?oQjAd_(731$~@5B^q*g$G$RXd519|T77i>L>RZu8-{y@LD8)0Q%1OJx
z<%_HhR(uCBjef1V0wVPwg>gzs_S)?XGjA#D!m~Un(Mg^r`J!6GItN{}A#aBSW0=fn
z-Rgx078}{H^G>6w_C>+X@|$Oj8lf8=qk~G^UpNNsb@9wZQlY^r)~e}b^`c;RZgp3?
z)h%4N$390R4s0{f+>_nb0t$+12<A<F|CcbtoA;5WpEP&Pc(QAn#Mej=TRDzR;*LsX
zUM`83I#n{$vx{eh8EY}2SpE-yHrbc*s`jSK?FfFwd+wmMpZ9*<y5fDO=c4NapM-Z5
z!-5_T3{Zr%{KXf-qE}?xe2zhaWA+gwJO_#SV$Zm1+(sIaVTW7(0&#CY$?sI<Saxvt
zqDwfs>)T~`Y2!w5DR4L2R5vs@lemAUN=Y&^vRcnP!LO44N3zT_!65-?)9|I)j^R~J
zY1bEgFJvi8^S8ie(eKiKON`f-VVt{ubXkme*&FQp9?3e0^lNa8V<RQ?-AU!M4FE)9
z-M7J}+egJE=Ium819j9j{f7)~LA9@asr8&1lr+fAx1X$b;OX)k38>hXF`5Mk*mLp8
z!e&KW<S05+?rpu3S0#*ve{{5pVaU#I*o#14l2DzQcVV))KBi`Ry%%MkBcSz$dMvsu
z2Ko<R?EWF=>;vS4yg-s*4YsDkBYr8SD-}95y;txtcyAOvd-P|xIm?S(TUU;O7mL*N
zqBA5Ml^2C(V`+^=8aNQ@R#ZqLwug4=8U5c@ag&vwRQ~}?22M09yqlf+KDV&o5L#LM
zuAB)Y+#f00x^ut%(j~}olOeR2b|`bGdEhNpl~bcbxZz}3^fRY+`Y^hD>Jzcws$@&`
z^Vx!vxIsu*Hj#lcmL&`)KIgbMD{vL)Ek&eB0Nxb0ALRj(Ldb?8JIq+qrnm&HauO6C
zk5D>HpFsZHbVPbWWBh!Mm|mlg7Ha*fx+C8x#<{+4k~?QstRF})g#rLqzo1+zE+T+V
zHP-b10NkNisBMMRpn(@l=}a^8Mf_9t=crsUtH3^%L{1W<i5$@3jdY3l&TILX-qJq^
ziv%rcI)iRl5DNt9cj(wOjs9Vo8Y9;`pRrO+9${~}{mrJh2EM?ga$cl5UdzIz(b7uq
zB{d!7f1iP)E~X~vK=NZw<%l_vv^%?J@!YY)WUbb9Sh0X^s5Ea3rc2IR{w*So_X;vQ
zy=VK;RO|pE_flL|b8q*6Xy;8-=IYo40SJV;?wRRsYj1R7o9m5Bpv<I!{z`>u0|-GZ
z2mb)&9B5N7M#R$Eh@LQPw0h1<z(3iZ^6~_6ZjxHCQ=K+^;$1i0xbdl7_gy%3x@3Pj
zYmst;+dDZvl>E7IJYIi|6Eao2QJYCokaI7iCC=V;A~HgHP*EC9hfjK9?@K$0UzrIq
z<27kh?s*WaLFLvqsk8svzR*A}gpr^n{U<aPt|HyPQ<;@&IPjn+P?OM<%`8jZ-beQ3
z>Llsz2u5;>FX7y5!nlmCRm~EhUHZt~eJfVbCs&x%0Z17s6t@Ol5mxF>K=^S9-60(y
zQC)HK{;>)pKKm;<B3`1nz^adS=T-y`A0|~);Xkh}cD}v*^h^CIYur-Rnxs~&lyuqQ
zZI=xe29N&{k;y;W@ON)3VYz+3j{vjeX~@!DfQK4h5)dU8o2!`F%CS=V7Hrw}cJ>6k
zRI4imN-o#r>-Hxe<Yifjg3&ZrCeRUifH%yz5J6hN)|m!6R&~p42JVkO2XvKf>_7F6
zti)$s;Ks~)921sSA6i5=iS^CF;yOR79P(=wo=x-0wG6_V{j~N3RbyOmtGG+@42;x8
zE#oK$SgywF)6k-|pm<_GL~rFekt{ZS#ms8Q2RgJ3nq%l9I^RC1P{I(5))KzAJSXl<
z;Rz3gQ7Tnw?WF(N%o%W9-e$gNjhL4HZ@cJVeO)2<4+#in-y4Tgca$J9^p95pfU`!h
zUA!O1tVE2f$HiCmtw=9EKkUvrU6!u2P?-H|;l3FKWY;Yj4^~3-ja{jqOh}KC!dS&=
z^x!A?Yrv?wZ{;<%{{VCIOr`!;;Hg33^i0Q3hdxD%T7B?z8xO@TYj&9`T}9YCVPcrl
zKFin34nvN1vzj4&qI5rp?{rw1Ly`LaEHlb~fRM@MmDO0Y!!m!K)yj%Y76ysZSaXi=
zV5!mC_hrG9VJAmaTixlSqLh`{nK|56a@sga><c4Q<>5R6n1DPJeAJUj_@6RM*#viN
zYLFm*xx3OM&}`I;t*0!;@zJ%)M-wAp+XA352(UiS{YrA}*$`F9z<)>joqx1DI{{{r
z>&>#Qey7*y4lOyT@#9)&b~>XVfo?NmY2p*4Is5g85-(}qU{!0|p>WO<V{DEe-2*Q%
zKkah3!Z7Q{T8ZoN_OkbC)Q58o<&=#fc5id8D--lOsQH#DUd*GVH55ga=@*oP52(Cc
z`SK1bTRL?|2%8#$<iH98#(G6F&p*-sH1qMi;QS1)-D*kiJ9?(o!0GnxWuVXJlV$Zc
z{=qx1KYUmULN8qz|E-&TFxdCpxY~~Dd63M}2FKHWMHB;6`?M(&c5Y<bnkQ&3eDoJx
zM`t4tJRToe&eYm4+P)-jekvfhKrTy<#9Yo<jSB~<_HcrN1hc$mR|!GCH6()YZ4t}X
zB)ym9SGc-!3bzQQA#Y<Cz70^F=tRG@%Zu|LK+tceyr9Q5PXVwz=Aw~E3DqbepE&t#
zO%IpKe=j80IM%dEq<Z?>TI}~Ta@Q3uXvo$3K?)e!KW}Ae0ri+n5yrgw=f>;_rJ&YU
zF{A-pJYv%%vaLS8Ta1B6kJm3=+;N2bsYlp$f>n;f7XJ(~Y!<8XMlqG6*ltnm#~Uhf
zYKCtuXsHD;I|oo?@DqaWx0~2Myr>?LI%g-&;6C(ugjyEykXDuD*ULDSr+^d9)mO_k
z1qhv$#H&XFqU1F$14`-qmh<M$fRQptZDo5ZKL?Ngbd?a97qR4}^~2n?L2o374^8vk
z&UdmJ)EypwVqE9UGP5YpSN*t4<NCDbr>;8=-3>D_++)yw$VB|A2GdhTjJKe*hGas<
z;}5cHLkUZx*6Qt%3wJ+Gv?qf8u!}<USzW(8e`ScnFO}{unac-USG!&)ZhnZ{qS2?N
zTB8tr{+|Dj#6YgRiMD^qcH{*9WAkkOC|=VXZQpYZd1<!qr*6{pS|k{6qy6~+Sl}WQ
zIPf9(pr_FPs61WeJwdGGT9}`Ufxf|*ZscpTj6V%*2gmxWd&>#<m8E=~^S6ZJU8_A3
z2ip*uWL1{pVBQUrk(J_%1H_1X-)1q$$|QKC;aY|gk&sOZ=wqsBJxo^KO3>5z2Vfxi
zJLBk=F$_LAG&T0EmT~H*FaEY6Dh*_-tFRGjnxG~1c(#wHR1e_o(faV796KLw+}1B#
zyXLvgmHi$JZeJNIIML=`91>LH$0BrHUMQrU+S<sZ&0-&T@7lIPkw45B#eD-NT*j$M
z^sY0`OwS*Scz;uPdS}$Z4<Ic1+2GGH0^0e=btaPD_Oz01D7%?NgAn0h9z|fE8_j>%
zCfUAiL0AkKnB6oAOHU{3;t6Cr+%t{)?9Vo9j7<Gg6D}$w5b-PC^YjB8YLY<wf#%N@
z8~cGf>yX9m!Gpvn<AK$FB&d@ynEe;(s2elRb*4LY(XFY2!qkm(t-gmR^4~e9i(rEh
zponULk->-t!3RNN+`WfDZ9YIGdW7@Dw(<PU!l~!`(W)gJBej^#L8>!$pKjXxbBt+>
z5$6nN*3gjBC81+xR09!a-7q?2L5lk8VNkGAdu4EV;OVN27ZWziGpS(Y3wO`8zTiOP
z=(owjiRry8BI=5q4O6Vv8qF{!Ye^ePF;xRKdF;3)v)NJMF5q9Evn9-kkAAY@SsauA
zUy&F5!X{$uGjEdg6_Ie1zR$;wpPz#`U7c~})FV5nMA4;^?SdC}(>e!@%^oBdadC=a
z#eUm`p9P!)4{<y*5CKVAP5~ENKaEw>X$@@xF2O!5t<`VGbjul$!JMtIjIN_9IBw1T
zRe}&@ywH%SgiqpK%3ZkxI7*}X2^)`i!x4RoybrgS8xJRZV|(CaEPrtrBE8I&^?-<i
z*HUH+=(Tu7&aTz30E0r@l4Jcy_<(!;BuLX<*JpsfDp{i66Hin#r2DH<sSSZFU3`8r
zvz>#GMSj`kz(9H|v_K?>GEm)kdBB<3`q_He$RvI~>36>~3c>gfo3XS<at<;;=ZsVi
zApJ?!N>lNKvMDi~Za95!v2?KuJRXz0^Da7?t>TINSB85Y^_}cz54xVqtIF}7WT7@b
z)hVKb{?l82S+MN{qN53MI0$cao%Qe$VidqHpp#z^oh!Rk`5LXT;GkLB_R*b9qw1r$
zfa>U@e)sW*Ym6X0OF9V}c!)RRy&GT$o}AHLY07udqXQ1Mam-pwCXmLLASs5}vDAeV
zK2_|VG02d|Jf)V00*_iW<=kM%IiZX!u4W*!;)$BqWd{SJ@!E=(@oYRPSRRc@=$*kc
zDb)o*gVHV}LPyU%_C6h`v1}(=zql#&M=0Izi`p7Wyou+)@&k(~^_%dptmbCWFQ0?s
zspDDJ028ldzrX|0Oc&%@;JLL&+VbAM<JXFsLJ<Q)@Ee(7xH>N-LWc>NV423#j&(!@
zs1g^ZdvO$Bphpr(ioD1RTtn>%Qc+PZOb}Al1+zD}O1B{`^vEDF{Zn-dJzCT^?_3ee
zmG94K=Dd{9yRY0-s>@ZIx+SP9e#um1gayty#W-F}AKM<44$Tt~Y-_70FVkAYrB4{%
zSOlP4s5)0}Fmfy7KCBi+2TJgJs;l?PB>-1{SmL#tDab#{yFh+xe15px_GNKN$9%~^
zf+j@qz@xEZK7jr6kV)<sEHk=}kYQ5cJ6^|&1qao#PX%9D3iY2mYlb(gQ+0@#l<ll(
ztl~|TW8*0TZVi~usHg;+<cwd_sPuS4`oH?(M_~ClU5)l89^zEvX+nLuUF9q<*9b!W
zN4fso7^3K2A!OzF_NosA-bWlh*%nTPjUIeVZSIEetch|3j%@J&f4TK$1jxR9n@~gw
zIV0JtZmdwt(4VasE22l*D!3ztx7pKki=S<`75;LJgQNIHmOU+bt(>iR97i;ifr9@4
zq2C~c+D!8BM2~WlFPt0<l?Z}RH8v`90BA;x38odXF|!2_B~oMIbh7VLT+^Al0x}lu
zf$4{2<H-@3K2nWd6AbQ&ZidcnOcUy0r|;#I2|9BZ^!snYsnP1A%-85mGjsWkx%T?1
z54}5T%&O2TpvN()%9e%ukky!2PT;M}AJmn@T$k+mQh)@@b&FYs|GQ_nmn6j(joG&b
zmFGX3kQspsrqiFEIxXCh(yeKBeCL1?P2P424-0#yh}DSEoDNc<ovl{n$&VvsI4@Fn
zHYBz_)_RoLsej-JWz9g1cr|6LrW*nyDQE+qQu=W>dp8MZdZEC-mv|@6zwz+Xr@DaM
zk(k3y%M&CD6U(|1G2#nyM0HM`1&PwW;lq6lz$7|(zoZGR&Z(HHSC`UB`;0K5?@aq#
zLu5!o|F{%%(Ik+B^9?ne@vnxpA*pO-)Z0Aw)N_TK)yX*Bn)*r(i~g>O1T&Kr3C?qI
zW~_#e;^$5u+BVUN0%L&rI>=T_<S$*H*33nw>Vp<_U`=3uAiuu*DTS`xRL$hMN~$-5
zPJI?ru(EPnK$!-znL=MruhKZhr911}f$>ZHe*hYY$ZG_!K>xjo*GWu%h6i}k`9t}b
z_Nif`t#6Vv_MTz>=zQbF1Xe#PK?x&A(5*c%)(7f>sbP2hIFVK*U|A<G_1<E=$ar)W
zMD+P)MNg_EwYa_q!><d;46u}V47Xh7n;Y+{vi6xQ1_|z0e7$B-FDqGBbAp%>Hq(nu
z+S5Ku0w(6Y`5=8W2~hPHHyRONNwMOkeyx#yS~1UiUOY5!=3jM93o=Bph)Wj-Q)XZy
zzUjyrIsY8xm9RUr*wQav(Rjk`0d`%O`W!2J>Qx5p?so=USFApH=lGM-K%oy%tuqd?
zeLzJE79T1RdcuJs_oQrK3K0xnde~0xmm-t1mhl~}P<H9)7%TWg4fbTjXlq5{FGgKr
zWSs6)JD38}Nr9@1&F-e%fPY_^2oR12zV`mO&8^!e4di*!<nn^5J2~tjy#98$EmO8u
zU-`C2!wrkDqPP+*@JrTy+}KZbkeF(n(_&iHD=B(0<eAK4ZUT&hsF!^aHvf~IwTKo8
zfG~v;$CZ8cjqom-=rAnNempMs&Mf)Eu+i{uJs(R^Ga2b&CF5iNY4xuE2FbstRl4|l
znH4XJ7)fThk)~%YtqR;%Gm50)*zwIbQZUF!vf<;x;3?qQYZ>Q5rj3YF$Jk*Z8KRbX
z@54o23`l_@n&W7pcPibGlxoUQ;#)@wH!_2};kM&x`7u5H)6=!W5%CFI3?3luZ2*3{
z_8GH@E&X!vn5eB*pd!Sk&RX;D>+FJ^y5n*;q)=wC(gB~iTQ78RrAIy3EBn6(VKsxg
zhb4gAsxPGBYbchhiczf=giUlvVv3f)ccMR_ourIqHP)V+@8;o;rNeq7qtZ7f#s8#|
zQ`1y|BZVv2v1;g%znGM_WWk>>9nHa14WlG~QFQy|d@{^n@zKHW4Nr&mjEE5VH28&)
zyk)=epMlMDj+A70M6_!LtiIIi?_;q~(&6>yG^-L%C5ULEK^cBW1A$7l%zx6~w8d+`
zjM=Z<l6Bay`{Z(FENe~Vfi-?;Gh!(tKdr?;I06>*mW1?WtUP*l(9s>Vn!-q1HYJ*9
z{3<u`W8{ts=hkoaw;L!it5-CzV7AWB+fC=%EFZh@Z&{Ld*XATN-)cRT9!4mW;RhFC
zY`iF&U5kVk%ZJ<gJsuV$6x|rVfAAl?L4}94hu)P>kk>lN+)!BY04$#wF9S06;_DRi
znI1g3G2=27R#z4b6wI@ou0Pkchz}*<NwBu!IX*}zdStJzwj7qA{cE!Lh4KD=tIv*x
ziZkEqK_jiY)tb$ZN$1oVKDAczlALQ9$-~frTm5p|y@f{lree?H6z8601?Q3W@s^yL
z8Q-vOfgg`7go6ipol@T1%pfv<DgHKC2W)nmJ3j&MrhWz{-J$<*3zMx4&zbJSYBenB
z^evaQ?snEA>S>*`4Dv@v=zqF72NwaV1+sG~6HxT4N#Xe3Qo~$YDNwC!T-eDRK}l5D
z?^za%jkU(t$}ZnfYbq)I7}Nh&R;-k;)V#hhyMKW1cbV_cZp)&@X+q+<nkItIqtII#
zsnWJf9QzC9ehM-b6^|czX<doa`+q1qQtWLdvWwho9U&I@8W(3GAvB47H*!>{rLJru
z_<^s140(~K-|lvwcU8&!im1~>gWtd%)|cWNs^y`Rm5|m?sn)9xkd2@#bm1>ad*d=q
z!A>3sZxGW8Pq`L7_4SE?UG=b86n&8qh!{xvP3v(ED5$zz=b@+cE(D-4407*{P3WW+
zSb_Z)vSFY|k1yu>RH+(tG$AlHqw5kkg;uVH#NV2o3A0zstZ%O2uMbI3%QeI^Ze8m8
z6EThUFB6!f|6sOsyJsLCt-Z;Jxuy|Z-EO<(9MhA~|6<G(r~>L$LRq!q*EC1_aGmLu
zl)2NbjQqx#8wvJrv}>$f{>pSUNsIB9<MmU;3+m>uK0{@Z{+99m6;-p{)P<7dRyiym
z0d|i8@_4-X=oQg!;WP30fU>t@_>Y+sQK7e8>9fgwZak@GS*>c0yNNZUrOXD}^+bcc
zenk;{@Y+vt&eFglElyAlll)A3k%WM;_8oWQ3!(CLA4V@1AH2+-pv40WvO>a~ucCE)
zP1QI}xI7Fg4`j2LE=unGG+kddb8e#R2pAsJzlQBaa~LdY=;<u*y1h-WRd3igUR}vm
zPS#Yjps_+*v1#_tPyYxs9|v<~N)#k$X$?d3zK43%SKq!mW|B7XF)EQwD57#Un9w(-
zf0OEhN*vxs*Vj%dm@=g;`ePn2F7a*?DHvz}R?`?Qh^Wa24DXat#l;Sn!lOtyQjL{F
z#fQAoIG87S1|bwK^;mZ~L$l{@#$in(n9aq0b^(T6!uaUY5b|c-JlEBq-(&`1F|)@O
z4r{ifRRb(c3!f&!{FWv&eQ2)J_W2<`!I1$>x{1tknxWO&!myEovEKrnW(KsBe!N}5
zok(-3TtVS*;GQv^Ra@H^jj0@jAtyX*udY3dH`O9tXl|(e7mOu*6p>>#qR{k;ZFXEB
zxu2lac#u}|Xe}d`h|Uk*LiZ(uWI|bMep55E`vmT?(%Rx54*HFHtW5gGkx(bPbWGw0
z#eMCSUWFA6G>Z-L?)H4j_TNjvyZHwz$Sbq!MA^WZFh8e2`DyJydbgqJ)dIZD`WI4-
zYkSrO;bA7thmKk#-IY14t2_l}%P$H#OzXtC++AXcJVt?!^qQ%v`!5{!4I4xhY$`RZ
zp*A@H3FJh8%GNOCcgDVEd8j`A62t*EH7O_5bM0eor-R4(!`(Pu=-c)RbLYU%FTrr%
zE@}L-(FvK6pA(z}5K|ZOXQPey`dr_<Hegp}Ns*BoGj`9qD}NgTNh!4qyznR`+Ju{k
zPX-}Id;}Q#*Ud6zR%L=3K|%__-t*1KjDyzUbPUCKXc0H*$)LeVA+v&^QQM3#w!0_C
zSm~ZXfBOf(uz%=Y@W@&oQ^`fc`f-h)u9LbJjv8aUe@u_}+o!>PDondMNH-Lv=oEj`
zP%tQMOw~e0Yr(P%QPZB{Z%3S|55vta9P`_cHdsD2KbuA!DeW{Z-yp`xUpt(>m@^}-
zlyKdto@MuXMEKY<NTH1@`?m#`ieJ%_EbUY2jTzhMavy3U8iuEZtHxo|U}?XSj0t)r
zfkW-_0qVK(Gu`}pKxhyX2!6JHCpO+HC2u^Rt9~TK&s>@PkUZqu*0s!aYzM7qA$ojL
zN4sH|wH6&BV>$&qu}hm>4(*FrtV;?Wd_i@s`V)O_BYoKL-wj5L46@tS9pHttW7A^}
z$Gj-iasdh(CbXgw*^{s=;UuWfp1fINk@S698?#C4R8eZX+)1*uUA8Y~RUUNlblVne
z%*LL;b9FeeCbX!WJo#DooKghQfUd9`$ODq|X=b9n-=(mst>O>bzkaKTI~A|OOMIrB
zw(dz+m0?2cgMWY%2cf9U=%}@MEuK}#xm3Mg_sfj%SJHwo2Y&B{i5G#V1V}FV1>G5-
zc7ABXW|E8gl@^o7AXyMz(Ct=t(PL2DPI=EV0!uI%y7nt5$rseNV54tmbrNVi`O>>w
ze+lP$x!G_yARya246o+y>=qf?uZ488w1$1$*jD#$tyh7lJNL1mXG)V>6~icLu|{5^
zN=B9X&ll`J9JUx9Udle|VDVhzjX!vrtvwlMFoS#gjLC8LptcpVLb~d`&Av9mWDDPL
zgQs{N2sp$KtRAO$f;hl`D&v{@oJ)%71^An=9D7C?Rc!-h76tLL%yk%ni@mlGjxeGP
zQhMiNqMp7IKIv*}>rwL44Y;dy0sGG{%xj#uyu8m8x71lz^nXRg3{eSda5^iS3<xyZ
zFDCjqi})*Icz08loJwO4Z?O3=OWM<+tQ1>Da~if;o1A9Bhj(%X1pTGAa{k<JoSSxW
z`L*hS9p0)=!BFW{lv1j5A>qOF2(7|L8SM8w^V78fs4EsOt%*|sW<Y?=@)BEw33I^5
znLttaEuR*H@U&}7(P(L?hyo5Gy&EQHb|$0_6XaaJanF)|FkJNeQvY~DXK`IPv~HhX
zTA+{JyWxfNx?BsRpSk20p6U)JG~L)3W$Q`&ZnL^@*xCrx-gxg*0PHp5wOsY&sa@_c
z-(dv0vI)MUcp{_xr$$Y&C4qdHXSDR>ml$VL!PA}WmKW~H>6$59+&hW!J6Kyu;G^ee
zEOQeFXirm_>Kg4Ca@*5~nAM{;(c~<7sDFV+ba-Sk^NG2&HTYM8^l_8vsv3g?rkn$w
zTY^cdxE2C;@>)bvu<wToxLTNCycaZ}%-W21p6~u@ElY%D43a2Jx66)|dc#w^SZsot
zQ?X_bzJN?mOA^e@!@i!hYeK0`hbdi=8MypsUi1cGL)xvNk|CdQcU<`ULGzXMa(iL#
zmGr3Em6ShDX>^9kiXpbHg~Sf@77lX5>;t~VnE1i!*OI*Dd<W@S(l!#uBoj2C{+;Fj
zKIen;R0d^<JBw@zF!+)?r6~p!qt8FB*^K#R(=yxM8U?O={T-dprKwVAkO!g_|E2w?
zE+ROm1j4$Vc?d$z>DDL<cU~bf@XI^m!s7QwPQA*R?(|rD6K%d-pkCetpX<+5GX^nt
zJa}1y%;StpeZlfhElrhej2=Y?TfQ4Xh7H*pa^L{Z382raRu59}H`ovUvuUM#F$l`Q
zG-m(3_%keI{=>@*VVmWXpcsG~mQIW7FY_;5#)IO@Khl6oF`6I99;^!m`<qSK`8{W+
z^2gv2gg#~X2aqBSgQuyCsPaz$A*)*WM}Rv4%ds201G;e@pmCczRR36$HnYk7x}NpR
z@d{2!TuIx%hrGbOei+<Y%JP!NGx={eotBo9rmarTXZQTFp{LDpb2PyO>d$$db>@O}
zg55@s%*N89aIm2FpR0?(dQxT6Q!j9PKhfnbz39m`JZ??N!_bj>a$}edJgogMb@GqS
zL@h%SRLB#GPpxRhUW>Z&<cBVl{0CLEpPAl{Om3o)7X98of@JDQPHBcOwKk_I$r$;5
z>nG#v1AGIEL*=~CyCJFC72P*)BW#Ph5Q(Mq9yoYALW!W<2<&DnE>kUl*Zg6j^;g8@
zXaFI6JV9Oa@!Ya3MyvAq?m=4H+omg%6!kiPWg^cJEUU~;t)sL2i7DfjpeI~c_NEiW
z!m`5^2NxtG_%++277a=;iHE;S`m<T1pkB7(ec?wxBIS|*pPpc+mYY!d6)OSLkbs3H
zO?&76{)BE7I7=B$SLc#EQG{0me#apkl&|wtI_8C!1s?bhQ`zdy%BruYpurLvI=h>&
zweYj3!5qIl4?feX#iI~O)y&s!!z6<?0B$EfsAiRR^B$cvAEx<JA-qWZ+)nEoB0Yhx
zhk!(;=7q@q-$^s?c-DxK48_M)*HSarZ)yUj3DXieZ;IK4_Wl7hqP)jvF7A~;X@9Py
z3g+4D+g!|gz)qicMid_Ivpz9ctblRn!%lsJQ8TalYVcFT^%WM1Mx~^>3Z>Q6O+cN4
zf+$Dg3gz^Jrb)5&P9E+S*&QULZTR>w+1y?Wh*6>v;vT7_;hZh0!rOll8A_@q$<|U$
z$N_m-@eeTJ6gna3!-|M;$q@`vs^_Q*O=^>aT5yRnz9vSA4rVnNhcHy7y<U4=`t!;J
zN3f+vIQUW#mAWez1>yDU^!sfXO8Fs(KQ__zDJ$mUtN0f9w2J_Ymh6Y{ClH%$4L;RF
zh2(z|fNLuXn$vbp5?aXInw$yCX49Z82N>M8V}IK{n%#wKD6-W_&^XR)jtZN!3YS&X
z6hCUK$o(yEB7|Q}NUQ1DJuz5NON;k|kFp}7q2;=G@4qMkh=K$s?~t7muCs93&@V7t
zfasp)okjVn1rO}F4QzaF+li09$faMb*d-V=Hxzdb2}Y#z7q8ZyZY8V{gufOY>l2l&
z*G)9>I4;Zd&z2fi43R(f^&NHpT0R(sGkgaaNIh4N3Q*}?XyZTf<94TfsmW#@Il(pS
zn=yl8!*#^4wG~jRg6K$~XAAosWx}^o)#dc&T~4{qvNitzhPNDU`$tb9mS(FDq{M*@
z9JKHs-sk=?OlKiy*DpQ^ggxN_n4m?zd0`BKQ81@efKQR1zz6k;;c&(t(iMqQ(o!%R
zG>^f;!#AamZ^CmfzBmsxd7Ysv*?|)oTCzaEX<{29M5;2WmC-53eG|re*ZG-u^FOx)
zPbN|AF&UiSo6y;XX=^%pR2hg8wi~1Ybw!}(B0r?E|C_fK){Bt`3?>KR&pJCXk_&Q5
z4ht%N+zVI5y`*9w<!SKwCO(Qat|dxMi`B30PHV|xg4uJrZp`jsej*o39^zfc?JGwT
zI9OIKz2U`M3tI9kAOMc#aBcz5%50K}#rttx{{RPu<tZAWtn^oSdB!_E?8Oa%e+<is
zYYi$-Gd#%W{PcU@Wv6<in#fDqTEE<sFD%UHzG%czjB*ElVW)v#<iGSI744agsmArx
z{0dsGB2`6nE#<s{2Q?wY3@lmr%p_JHhLRI$Zuv3qOZ#s3#9XJw^M8&!^>$#urxNm(
zq+6_<MF|i*Ach&QDWDEagb9<B|4eHJJX+iA_fN&G0{CSr>I6la+n1+xy9Q{vyqGos
zLR~L&UKS3!(T88BDvd=NX+jnEWA<;%$x-PgaaHoMn$X!Tb%5ZnvfjH6o36vS*%`0e
zZJzH#H5{$<em{W@!IbTc_@|RRp!Yz}&Xds^D@n$$_*6k0VIc%(`X(Y8t74p5azIzo
zI^g#_-CoA9iocOp{PnL>YLwB|bxpKAAb7;sU!m-<H=qKR2$wl2a5=(&fAeFo{oxDe
zoTwq8#a*z{V*@d-2602DqS{4P&j%??kWJFNnnA6*8b#vRp@0SZ-1q0tkdNwTb!;<I
zT*@8zu4ksD*7N3F+w)nMR>Pi3bAO!vMziLw5VJ4z#MCQDUD)Rs@0V||4Y<45+w59$
z_sa(d<kpHSw^JK2NvN}Ng6n(Z)VboCGr2SpliY6ykL{{A7Z0JJ)Z+pVHjC2K?40r(
z{?`4hb>Y{v@|u`>uT=sZCF=dFN*fo7z+zNQRv_;`0OzE1bvyq~W@~LLZ0A0+?H(xo
zW%&=VS$i}`!AwG-VPm~=eEG<?I?muNnH5U!`DwyG05G_chbf5KyalOMD-Qi3Z(cmj
zz=*1-3GhudD>9rDKIIq6JV`-T82Xgz*Ev)?oyxMjNn*1w|E|E<t&kmLL-Y3)7GY78
zes<K5;Tfx$pL8W&&ua`=jck-`n3u<uPc6<kTF!kOzBaYJoVB*QakH2CZEZZvgRo4q
zFmU__=x+Nn(tBc2gqk<Ui4*QVd_n?Ar~5Tt&B6b{7L_Z-F_@;JL9M)*&GNpn*~%ya
zS3XLD%93JZv3khEo#ALpO05YVeXs?N%?$Lq`U@6T!0*1PAsseTOD<AZi2&CdjV>7<
z=u0RV>Y<kYh*pyvd{j8cAw&@eLMc`<cEcnm4n77F6?at6Gp<)eU)agJ$rt&=RFBqW
zyUhtDPj00$I8&RM*xPg_X|V>){A#Kq2sii;W;wLZ2>kC?FyN4Q|C@%m=2)Pa|Kr4&
zg<vF{w}$R{C#LfIi7aDDl~+Agk1HA&tbk&U$6VoRK2YwpV@vjeEx5N|%}3iVOx&8x
zH-u#P>|Y*=>bt)S@Cm3+;1hup`K5f-3(6!r$elir$>u3~hKb}75^U?*?8SDd_sB6_
zZ#2D6=IJZgW%s%?7S4)!b&)Yh{14DcS^6ot=5fx(8^OeOX<*@CyNzE^igf!hlLnFx
z=)j)C^g&9zu%bwF4eFJFfy^k3iJL^!KPoGdG|{oNvy8TI{gw76r|-nruJ}2<?L)Y}
z`{l(&TB~c^_qur``aj!|K`uAu!#UWXz;lEvUD@?_rnh|8=p{!{D3;CqMP;|XihS|*
zn!ST9*c@&dq#63SKt0gkmLi%KPwzR+APpPV=kZkhmu|}d2FnWqB)l%2#(lr?7*@l8
zsxtI1=4836l=P<k62L0XS0uQ*nyVle$*WacU@UH}^yi)P@iUk~u5cjn@t}OON|sjc
zwcwe)AC6aCf+H~SUZ{s#x4H^Woh9>Z(#6#|*<4qxPsnQMwA-HX>sLRCXBNWdN*Aq(
zRYMp=tKX8C(7l@Boh5q``p}OA-j)6{Xxq64yt!9UV$+k|VaqfGsAw;uXd{Z{NYI6W
zOp1fHK4E;rXj{m5{K^mV;J1iJwr{}3Wp1zpXt>mrs%-iDeg(6ZHU5|tG7axCI|*$F
z1f?OI@=c(W=*f7zV2a3-<E&*!{=?L%_v<e=zw6w<pt7BGYMF<h-P76=DHPxlgPKfe
z3}JV>?DPqPhW^^z%g#n&jN;VJ+f349ywNpOOK+++qb)-3(@I#JZ0nehhydriOYKqc
zTG+Jip9SAnTJp|m$+wxtllax~G%?s}3Um2p6BD}LI4|Qt;QRD2;l9YqA{)O=aNlG2
zhd5_YI25)Njr?1;$b-r2YG1VALM;A1et5Tpe$x1r$2sWBL3K#CQ|nVk-0NKF4Gt`z
zxW(8a^9uV;rxUKu(Zv+3W}lQzS7L_8<tar?iuqYnfncU>N=Ku+p5Z-W7%hz`+VWxN
zU+Ln0P5D{kOC8iapH)L)w9A|Q;ChE*LoK>2I96~lCAgLcxzCu)l{K@P?YNl`k*)Hr
zEZyBoIGpmyFvLEqc&QV#najYN73O}<Y<kIR_l92bOD6*#ch<$kPS<}tF2q&oB40fH
zYcUvcvoYr`EE`Xu(nYh`l(s)B`L3|n$(XZ#T(B;EaBIBk34Mcp#%EG1MK^}&C0l6d
z*QuuU`u^&JxRsVc3z43;-&S|hrTI>MsTSgQh8+C*l>c+D_+)oTvXft$HT5^>3C~27
z7lD1W>Y(|;WL%FDDzRwnd74P<tNP3L%q8b<v8X)_yk)u4hKeGJw`FmL9XqYV?0a*k
z7Tqa6v0potyeWht>n$gE@+RRZ-46)7BjliA)kt%^8xsi*(?M@#u1#DDf<w9cOLwzw
z-w9;4+6argWdj7|eD<mk<&#irpU$#J&X)hFM(l`lCu{nko;YgkGB*~9C>Ww`4t%0o
zo*tgwOGGj>qu?v{%@)*z8_)9~$C6%dY5wvWB6>lxV7WXd0u=YsFG;M?_!$ZKcV~Ei
zUAz61TR@O351gCt>W<2+@0+Cv8`<dnQYK=Mi!Z2u2s$uj_ik5M+Qk&=VExeKk3!r>
zjZr|CJeUNv;PgaJGnL5K=X8Mnay7l0=f01;6XsZmK6bcHHRJJfFbsNr7Cohs(7JjQ
zK|h5|X(geblic<5Ff3M{O_ic8y5mj1Z<tQqWzBvte4aQo>T94&yV)IsR*-S3$57<3
zX77dxMeYkx2vJE0MY}n~_H$YoN2Of+9G%R~s=j94SJb!MdrC&s>iFQ9e!9Lro}uDx
z>wnS>zB||>R5=i+^-Fs5!++pc2GP}+&FtX5&5HY`xBDsxh0r)iqz{4_+Dre|BBmzN
zjxBL7CG?zD6WrpQ*R+}8oAHl{an$M`#cvn5)bD)tSf!45S{ALgM=^t%wJt274*Ob8
z$kJxs5Pcxc``ukOt@j;|Y3@#|ed_tM;saK(`RQ7%UWpxeJ286Z^U@8A=wqrc;mRsY
z(lwc}YroaS=yV2XhXh#P{=sFxN4$@&a1)xKqqH|5YXt88*3pP$n~5E{na&J2;Sh8Q
zo{*2%1eXvDs&Fg7$Dn<Js}Fo~USh7P?8zYdvWnyUz<FM~UNaNSD08}=gjhdu{TGy_
z8|)$P{fplE+xb;Fk5EjSb54Ek8;{UVkKs>_lI@O5h~H!RM*k9v$7c)G0p~}W7yA@3
z!g928;Y|lyS*+<Hzs4lWYG>>VKQ5fRb#$b`o35Tp<A2P#RXp>{t2#R)qst$;5(mw$
zzd5Tfu0EubE^`Ry;2ZQNp-5d|wWy6`(=0Eoo<X1frGG!f*!>H8Q$q_w?w|$`mQ*h9
zSl`Q5=?qX4agO>M2u*X8t0e^Yy2wwz5(Y?DVO+PPt2LN9+4%{}?wdC_DV(3iQXVn`
z{4@id|LJJ=&scA;^)Txsh<wuLTGIdasAPo`8@H2?H0I#MzP=d)5f0m+L=2kmqfmMZ
z`7b4(5E>r^L18HJ_(Op(&3Jts{|6cAg6TfFPf}0pUvCI(ch<PBT-b_Cj#Z@_-8?+O
zcx2ON>g)T+7*VNnO6O{K@RApC_T1}xH(j`>=4dmMw2yH~{17+G>mkqR<PUw&SiuPH
z+U1`vE-fZxQk3a?{?yAdd<P4qE71qbQLq&>a1?;BL^E-2B&@v%;H3zrO*dPu?<ekD
zICxWnZRvVpXpnKEnQOe+V_)3Rc(D^48Wn&`p4d!x3tf49JRE0i_*j$-37{TSP~}};
zl+(hM-S%GRTraPvS&a^<hq-}s3bt8Yd`t37=Xly$lbT#MEMjiu&K`Y4B}a$E?Q#Zc
zLTUCrN=m0ircZHV3zU*02i<0l`1F+IzpvMP-Yt>z&&_zX*H6>Z3lailsmdWgn1~C?
z`RXi$mqlTdW=n2kjx5?MspRbDq~Zmr949)*dRlXtNoS0Rd*{@X#1lG4ms1X?E9PsE
z241zTQuBa|Q^EB6e*mOamHijAjqv+U8PJqB;3sNlIdpD5;iZIjd+^A0Lqhf1a42am
zF=Yj`DR(np)6-}&<XomoRB}QgiHSR(oac#5qjO<qWW^B8W+_de29+k~pq3^=>js|6
zpD-lQ_hQD<B0F^hWy6vha9~kZcdK93Bkbg#?Pf{+bH^`6uUKQ2xB0%Wl$l8_*VrPz
z`M=z#y43Ghwfe(guXl%*1S#8sre<Bl`cjDNtk<QG=GV;kg?fg4zdR{AMYh&HT}9N2
z1P%Sv>&=?~LFu0cZ~VCOH<Z`ox80!7HfX4}nKV<_DQ<GJ$TF9ZBk9E0dbCA^JHANp
za+-zc7!85IN#89`ie%7x<S5(lI%4-J_Unm{XQigT5PG;r`obL4t|g~0qNs~O0xbDW
zr@p4Q%=^&_<byn~A(!H(zUC;gxGr2TuF*>PTb`x^aiNR(UzQS@B%nlU@aDVWzcY!k
z)KLx55N^HrP6a4?z5o0S#kk&U5?=B)lpcU_f_fke+C^Mq9I!v(te^s5isMdw;Jtc@
zm<#T5E;j694C|sOcn@ruf>;I|LF3prIJnGJ!oLU>_+(o)ZNal-<XK(~%h1<OSYZuD
zT~#8196SjoJpTX{3xyJr#;3|9GrYF&Fs+U03o%biPilW2mw$j|A1udw(Bw~{cp3oe
zFZq!;1?@jT>%ve>?RN3Jg{Cy#g#X$3-H&%>caNXVzH*)p60{k<<S`o(lncFNb&|Hu
z+EnK(OYa1(Yl2^Y#dMJycxF2qJUSkv0MW4G__`lK$+yDC5U2D3w}d>NYXp>tIZJ)2
zX}uqs%$rRgGrLeLCQ9{RKo*8QuW||VKg~)yJUj0-q^=EqQ8YZ=T=T=Y0bVFNe<7E7
z+9mkh{JqY_#oZ6pR3jFhH;zZ&YrQI^ER)5c&n-~1h9O;=J{&?>LR8AQ70=Yv7jt<n
zyl_`v*AT(3M>ZbIrd~JXk175ER>h$s_)S>bW%YwEA^S`>R?0G^FN`jmRvHicsES}l
zYX`8U?syn-jnOYKPfeT?u2#T;R1LZ+T@Dd}l(81sfOgWvZN4!6*`l~L-6EIn8#MJs
zRRpe6IQO|D#!U0@5`}Z62lCYa_Sdp54z$Nv62NQVIB6_NIB~<_x=h)Kqq+tPo8>2w
zI|TarwHI%Y!HJrR)i+bPP8#%`y`u1FD#p8Obf+x$%J@-`>%sb0q9C$T;p5r0Xx|`j
ziX?x3VO4dmbxkwWqx?1^oW=7u6H@J2eoZslFY*`}jX25wP_DU`Ah-ZJ0}nURx7uuj
z6Ifyf{*-mW`9+oXgDi1ga%yrheR1c^;aN)qZce{iIjj1yu(Dc8RAjb*(5JsO@ANN|
zB6b#^#g%GFRCP0>%U26?w-nt!P)e#YQQpiVfz64pUV1nA2iP0JX8)R*+1L_2og$mA
z^s}`SYBF7{9Zco#r~hfS>G_)EW;Xm%fL{G=Y-f#dR?cIQrz8F)UK+FgJMgv)hQF-w
za+rE{obJ2Vo{>t?=;KhCjoI6HIS7MHhw6mGK${O#5vz~^zUsSezuCplKbGzorzS1+
zt@6nTdbF)Bh&<&Pgz>k@y-O(emdDQRq)=|Ig5NMRmn=Wl9_z3wx5HX0bgpCxsjb?Y
z(6;Ar@#auSyY%>(wT-vo=F;K`CV>jsl7n*0L47E}P0vjx+SJ)-(=9~F&p5Y_i5diy
zFePoqV_xckr2cZ}w)B0eCV-h#s>qD%^M3#+kG&kh4<z?sizD`2g@W-iWs5>p>5_I5
z=lFZGfu~26RwB@IJOr?1Tf&-#=?TOuad&{t67dq~qV0KX{R}*Ne3G2iCHM(%XDWH?
zJa3}O%p8{E{_NC}L0wp`%IG5Pf4u2P>Samd_bG6Ji?@9d@_8o!dAkTg@6?w6M=Bvx
z%VDxwM>V>W*b41Vl#}9sP7QmLHD_P(bihPy0_lIIbm$S?>-h2gm)~&A>9S~GfO&kl
z6xgBLICV$7z)?aAJ}ir0H)+5d)<@qcxICm&bYnyl$-5Qkkdfu(L?-1^GKSpFT~=ac
z>Yx{y=k8s{+o@>t(n|uqRD>7(V9<Uu08>55<2uf?$sRlYt@7&9kOvjk6nrG6UTq~y
zPn5L!9NMI>()X8*RoGUmBu?#>cKN+Di(^h%q(>+E^Nf+g+pn5|>;l%G<X^{&9KjR*
z=1QNKs1ubniN|%`8r^MA9IA02L8#i=CdzYzY*AE-!U7ypWm&-%EY$VE6?XSZMq74`
z=n_gN63`6U=EwINsH*2}I)&wXhMGUyuh+81L9u$}*sj`tfItSoh0v~{?oUP*{eub0
zRN_e6Uq0<o4%Tn1I_QM(I6?5ugVqezHTLw6?@XEn^V}0wuP7;53SEUpO54v2=})!y
z=6RqDF#T>ZezZnEa?Z4G+aTCnK-{$-SHeO~ZMGd%#+?^*ldg_OfLAyS=a%fCB-Ze@
z<j>U&zWKO6hT-AY1plcJ{SO|Jq_mcxE=y!(C(j9b0(_buanXW@>=>!xO%dtAm3or;
zUZFcFu=7@{!Xu;#Q<R^eSxp~&B2|!s-XiW+Wt7$6Xdw){Sq-kJd<(fIO)b;NKY4Bx
zABRL567W#y^?xIhksF4R$_tPX7XW_SH$IG;P-5pJel;6V($XHei*QNP(0*J#+sM_@
z5xJYm&HqiivG{%|D;gD><{ygix^<W+b6y&z&^Lc*BHU&Y`O0{XJQq2ashB?77x%At
z?X=SrQ~mM%r?dIGJaMfd?*AZs-CylByTf2vWRs8|C)P)s(;5$!gPT3Fw>?&$y*?Mo
zr~#uJj@(SJ&-3IHtR=J?;r{>z3O+IWe%3NBIcZ%yQ68xl^LMG|k226_=wp$`=E8Vx
zliXdXG`WHM5`V9i+0@H&4z4x52BsjHDtw>@O$a7vuAuYMux}Fy!kYbT%iEiE^#zl1
zGK0d6yOFoEZ-nl<`iirHgo((E7Gs%V$o@8|_BRq#UGKb~Bc<Ii2dcqD_kG2G>WK(V
z7=^0fYYisOIjWATx{zdc8@W)2Ky0QU{O_Da&}#8A-cf=%O0kwOUF3KMaDp1AF&fxz
zo}kf0h|F3A;Dv4Wb`orxuBy`-gN4~x5pa&&pM##X?5nG{Cd8b-cX}lY*lTpK8!T}w
zyP`sicr82GCtC=6`v|ZRA#{|}tl(F9sa8Tbo7t}hY9^Q(K4<?y)naAfDmK`j9B;R~
z?%V<mJ~rIXvbWc%Z41JaM=HIxIHzM(pq^`slNhqdl3asE+00&`&e6F7d4U_L_o`g8
zTjh`E>Nkjrx^}o0l%%`jOc9Mc(^u!!4Dc+nfHcSo=&%;M(O57|1|%GW=7$&lcVztR
zB9Alz6Ql3L&^=#-{K6@$v?@90@cNbMYpl6|dbQpn>Ul|z#@LaQ>CvzT#<^2OjBL*4
z^GVX3=C*RkgB8=ZXPVbFRviT_4b)#>KCVtz5uYF&`JOeT*I6dwEU1htrL_93HlQ_R
z2mxBMP5Jq-G-G2)(dVqB4JDoxMtfU?W`SDalm+C4)qWM!m~p|X=YOw@ufxNAFl1+M
zmp1deC^p+;s)}BBOdaub_b6Qo|BnO<qnjJA=xJ}wU?r-t(kB*%7FkA%i-?(i-M~K@
z-|zV`mlEjy3<xdL>IZX^A3VEAy)3)>RMQ<^0KL)C&1>%mzr}_QQe%g5=2Bb+wQ67e
zMDaG$cG<8CdMbNh-9NPOGTd)5j)C9CoJ(wL&9+==v!MI(Yy9-FKbT^QKC!K(_)o1H
zkwy%XrS$w;SCwbC@B&LpiQgw8zJl3?2ofJ;nT>5fq<i8au{rZ*<*GEN#l7OAqPX_E
zcKx$r^s_~_6J|clHN*NQ#k&v`F=eU*Y3(+d=0|dR<X$vw0X(SSQh%7a(FLWH?x<;J
z(S$%n<*Zy)3@KuGc@WUtH%<w+#-j3Y_lem$0Yp4&f{tc_9s)M3fvSs1F|ngD4_2Ii
zGP4-k!#C?|Br-;sNfP?|X=jEPkG1|oUPQqIA6#rnjX5((?}EUs;jK{1!jr98;qMwN
zEt~}1`}?8(6u#Z!9y7RY5XtwCZ+jLRfeIb84@ZZAXK%`HWqe8&hY4v#Z=Syc&D`?s
z|5Tu!;C%gPmH7U|W+{;-cZ+wZp->}nDiZ_bFN;J0`-C>#3Ch6rAHX{ZXB;=@@=>3_
z^_i#F&NAuaYD6=6`X2M_uGfi4T_JV)Mgme7)W0HPgvo-X3!3Y94l4c9<q^W~2@;2L
z?M?>}eI<Omc#{on704N92_6I?-Y&YbWRJQhXwPzeYPY)2U4ovW4?(ULGTPPOpD1j*
z{B?e5bz)P`V-$Zk<^JkRygW<0#7Abmayw#tfxSB_ji`}Ef+4SNy1HCt1j2Y1#1f?V
zpILM47kD8ekS656A2I&`X8#k8FyGZRf2w^#fB&!4@;UU`G+)>I2RKr^u^9XZs5xwo
z>i$0f#6Ua0^CzhdZ}k5F%+0F&N-JR*Q5z0__f%5bKK2+m9OjlC-xokh9w_IMS8^m(
zUOZQI9N;E+5trKpoN^5by`;Dycoa|x`@AyYPnu(=vpHqK&)q8uExk75{-YA-rslgH
z<j{Z9VMkyIUMq;$$RU*`Q}PFz>pD4IPfms1v$cA!6a7(v?u7gvqtNmyaS^W`*cFYp
zX0Cla`9ma(cOGi6+C=l=Ye%Nr+uK=|eoiaMXOvWH$>`$mf>U{FSjIw@r(2syLpa7w
zQ%!1(a%S9DCxKR)eUu<(M<jl09N?1dxl$W(Wd@}v_XgDmfmYU<To&;Ht&C95Zmlpd
zwg97!*y6*!NflJ)+*EuG=F<m)`WbHC)f^JN)mEqJ!*{8z&$w_8RA!xF9Cwd89$K$p
zL<D620L@Qx+IIk}Z-X^|WT=7_{{WgZPPcn|d@GYiYcVO2N)Tx+SC9`zmE>|kZKPMr
zaLVL{!J?v#u3^jbQR;R%xJHYCo@&xHNG-bK6eYD)5z5hB6jDbn!JV}<gJ8zt%}4vh
zlYY*{QeEm!m&$+>%|!`G(jE!E%j0GWdymQSLrob_NL-()mTTr)F)lb!QE6J77lfF`
z;ya+DD!L>4D}U%ld!cmlls{RkNUiP^g^o@IKJjFf;WD)7UR!v%<kL$enpR6~b}~yg
z&iM}N8atFX1~vm|@kGIV*KA@aSM1cXErq)7lW0F?m!6mt+t@Ar-L<SDN8o)5h0+a1
z#{iNW>WI0H*7-K7t1pU{*1>IH!hgjwlv3leHVdcOiwHJHz@b_ncTo=bCak2MG~!c?
z)R998{{SE(J&Ip)S8Qb(Rx;)i&=fyaU#B<i?tOp`PnxuX_7|AI_{jUF%&{fFkf;al
zn{_H>Yj;c@{w(Fbs;&#7w_tL>RkiSag|qQBSNqF2r4a=Anz9PlM1r9Eo48gmcr1KX
zYopvg_V};WTf-fzL>a<^`Jr!D2t-3aSg97H&=(v7tQQX+AbRmbZo8%VFz+<WX>K4=
z4;iUY{^_>La4D^zu_Bcx`E8)@TMw~8qvv~DVYD)>Mj#g}xDk)KF_KJ2_Ekq|wqJW0
zrM^#fLv&Y>N8EpfcJ>88sBUtxj(pX-+vG7<*hZ53puqr|3gMm`v-L;YM7B}lR>3sQ
zN&zHdMa3i~@+9d#45qPY*B3?KkZF-dkjJn#D(dPb5R!YvQUDd$lbYzlTcbrO)Q<$(
zcMfV;kjyijVypf460ij9rILBLTX1patZ-rMD(VcO%?9x^+n9#~@m6-5UN7<-wmj5R
z1yDfj^+FSIbipX5OQNBQ+moDA<B-jCc)%4cf&{lAxF&{T(OeC^9_qW@CdPblS8A5*
z*M*7Vg|oDsbul4-@t+lGs1@*ero^mJFyj@XIU0zLDTUm}GK?wX6%+^C7@M!Uit72{
z+N^V$onaR-0$2cQu2Scr=vl5raFcGOkK&!CU71(@<p@8TaGKW6Ewb$&nrw4hPs}VS
z9W~efg*xE*`r<h9NL`|<=1r}|To~C-7-Sk2@&FD%z^}1NC(6(S3<Ku0KAqO?bgSf^
zCOdP;sun=3F~tyv3-RugYLaP;n~St}&rWMlYgUUaZavI$xvO~c3C{+&52rQDogV!s
zjmS*=mo?gbH?3Q1HtQ@744CC}&2V*kM~T|#>*@6iBB$AINd}0wW@e^s&m>^ef)yDD
zn#5{sHYdwtG<~rtf!=DO2?u9`RyMP9$)E;UsK+C!CfWD=Pa><m;yE0OAz6Y4kGd&>
zg5-y3G%UeGWS(fKCo&8*4{-oz+zk)`&lcv~z14=3btSui{{Y6L5oBhSp$<66BBEqy
z)LY7ursX-!OsB<WwELI0eXM>pp|_D_K+Dc(h}9D>Sf^n?>Y521QnQV?^H2g7<0Fa-
zRMeobw0oO}2#PbwsP(N*+fB23i-{EijEtV^=daXXUES+fQd?WOOLaH^9_s8kWtv-J
zEasrClK!Utpxo;BPibc5EtKPrHO*!J04#x+l1b*KRFR)2oY$sSSwajq^<IZhr;aGn
zIazYcBeo^opaES|BNbC~AGXc7U{<=?+DR=pj2{@PU0Tjf$8r#BM50ugM!gaZZNE%C
zZ-41MRyOw;t(f7Eo+}hlq9;Fe;CEuPWX7hcmPT$a(f41ay+LQ{Eha15IaRj|um`HX
zjQj!fUm1RudiwX$nyhH@mb7vMDLq%#zM0qTb=@{A%PCdEfJg_b;c75EQvDq~SrpND
z%y`?~NPtMWJ<#EZIRc%MPFIS~*=-MzcK-lX41}*IswbHjkNB$XR^VH}#i%zyMHi{{
z6}OGHw>YkOb8`202Ya2Q{ndx5^&fU;JH8^RG<jpY-y*3YPAi$N$m5N)ZqmUyBFfn#
zUKWc>VBh5IR90)fxZ<qMs|fNlUV5bYI_{SQHoBeFqkCx~DgOYrv|4rg+@K&~yo$=Y
z2yO#EKZ>-|r;ZD>Vf<TyE2E&*$}-z0!IM*`i&mE=gzi0bX>e42Y4gTo3d5M&kZFVE
z=Zf?PD(q^4?UTT%f`?$EfGMPIUIqZEVO;((Qpm+rE8R-J$MaH!_+Zr^sx|8!0>S3F
zkOl+dx$cVP(g2>N*KG7HIqvSH0I1<}-F&V3r|T_4)cTZw$jfXA+JNzj(R$a^m%gRG
zavg*@!mo<OU+K(^hE7d%^txz9RX@w<&!|~mFLEp6_GE+%?bRD+2G5!7c&qCzAQHvK
z1qG-;(YpXf1$C4t<k@ZMA5h+Cmx}6J*<;A@UAI8g<JK)Bn%%)T<O5$ky~M3*5rW`#
zfm#ni`kv=P)D)I210Rys4Q7!|ZjWZ&E~UZkmq*d|QAUZ9Ij5;DCPBt3uS?afwL2K@
zZKMnsatDgHw;SUOzwcZYozpt$@J$&x3+9O0$D>h31_93^icB10kRzY~f(hoN$Y1{e
zWk#w)QIMjxHwr6}Sm<)={Zxhd0CGh<Dk2r?p&^0zinII)<Rw%v2AmW#1wB-D`_;Zf
z{iv=7IH-uowrzZ7yk7@u)Qr4m6g-W$zUWaCJnlds(@_|%bDH+;_y(pN0mcOYjp0`q
z=Deo|np}>&(m5b;P=b(ma4En5Fb6cFi1H6Lo%Nro>^(E4$#-)qnLzlad86lm6MD<l
z8h@p<$GW_kL+$vcc_zM2{Yv%K{{X7BH+PYi-A}<Y#X<U$>rF$|I;@jVD+ob56Fg$E
znI!jJeJ+!4%6**OMyCET4YItJ3D#KLt0~ALzMB64>Hh%rR7|nx+O)jz5t!G@kpBQ7
zR@!8<+0K^`IY(2FMQY7~WkndWxHI%#Cler&G72!rAk%@`)x&>6e^SN$ta?_fHrW&a
zjK{j}JJpq0Fu;xot}aY&4w=z})Rm%M2+e!5ObkHkoH;oaU5<E+Qf=cRpXQoQ)7KP8
zMIwSZq1XZ46(GpsqGZVCl1OC6D-2-Mh6usxp+82XjErE?#qu*nZlPnbjIoTV?t<C0
z16nb(saE6~s=u`{wpaL?E<Q-5;1LGn#}(;YYU4DFcCQpXtn8+dA;c#+AkzC`hDph&
zVaVmGOynGKOn}C+VCOWbr1IS7sz{XQ<Y<K>Y5Sm&8yeA^3Trw-rKE7Gqa>R1O0mN7
zO2JimEPPiU{Zaiu4qr*q+?<><k2RwX31yW;u6Yy{vb*)S^&yV!BS~zIoy9lwOZ6t=
z!f5Pu8H+~543k{u_fL{!XPHY#dCBgnBU_tUvbj>HBD+87&{*rq&U(2yUxV+xHu?+e
zRGtS6j5Ebf_{DtX`Xl;|@H~2Eqc6npGHd92OPH=<Sz}<z!yt;|WQ4KHY>lvzlC(-G
z@wbCaWh0SZ29u%4b(Q}Bu$71Y)Bga)d{_F8u|KLWHk`M(b6;oc67AEb&sz`w02T3P
zt|m|a02N+fXZ*&xUY*RFG3vNQHJpTePs9)W)U1=uCjgLoH0ca;I^=Aljlvr<7-d!i
zitVOzfEQ|9h^{~w91%lmNLBZ#`1q;q)k!TW%XlWAa*HDDW*8JDz{nQF0!9vVU32v2
zENSdy`r^4Qssxk`z<_J5dM-tiMP)|$0ph&B^)i3Q;O+V~d%T9q&X<MMd~@9%btEdN
zIjXqlxQ%v89%+z8Bi)DEKqsp3a>knE_AsT%r4WAZB`fhGs!1Wbfst}qO-~V)-q1F}
z4mhD=)Fm!AM9s}&cKYbLEzo<-BsS!tlE6^9edN~?AKXU6o+`p2EW_L}?NB;3EtG-e
zA)*5Z>Z)xd(_DrsgS6=4ARN>>_0(3jj{C4c@j>hV0Jw=#TNc_!@)hVJ7xv1xBP5P<
zNT9fsm{YDwjU!f<P*w`0k>t_3UF`n=zxJe0h@-}zZD|93*+QS{qqB`w3u7<$R!UIk
zYT$P*+_X=q%)8f|b58xpnLVI*#RW7|%Eg#)IjOXGM2^hIfmO>M)nchdpkHCF+@B0+
z$Ta(6J?Btc6+QGIr^JI%WuSv|@-fvboS0Y147o_!BOWq@V-zlrWSVrW(8q;inl8d<
zu2_;V0i)+=fZ8#N3UW!Iq?BDy4dwBLOZ!`o)dOv*wCqff6cl{0tG4pP1H}%~#?t$K
z*aPnr$;PUQ82*A5?$>Ad)NlI50d->cr*y|^_!+AUcx@w6{BEYt6tj>MA%5y9CD0cP
ziqlPK6iAX`M~u|Xb~k%D`BdQwM}kiRs^zvTdGE)PPjnSY+=;r%&rq%8)1*-X269NN
zdvGGtj`<Gov{Wb^vfyXz)H;F<gea$Z6G}DERmo9Jp``QOypw`3+#0afWtk8NmvN~y
zn3w_>VyD!hD-KurQIwRWWN&P!4$0=8+Y+u1)d==bt-z6oIjb8D!Yr`ng}-otoSJC5
zl_I%2k?GN80a1<xNN4t-*|VWi=}i>2+sOs1*H+=i0T?v5%`m<=DPz<<`O3U_0QW@8
za}ZJ^A1amOg52She|0QyTNU>>$*UWOgiGV1nXL`OZkV6N9RxC4MhI_!@loGOLu`a@
zZ^caB`&xv44k_EU$QyK4YI=qV!e_-xq_nX@_YQCX%`m_V>|vkHPa+>=#1F)dsoRQF
zh}&y{u12Z1Y`+j_tB_%cN1TroK8XN{*>;dKMCxeUH!13swB<{oWZL1B{-A$q)E<ef
z<+Qu^1_rbj@&4j0xycn(62%McDmktA8c?q#Qj1K9BSy$!fl?;L!hyvk@-FNxI^Sp;
z*i*Z%LltO>@^KR4G0g^MlogeS6nv_ZWPzH6pvsWlRgiH@kTm*5or+5;ETz6U>X~o@
zPk}_~BNl{s1BwbYV|L|_^;y%oHj5fvGOtu;7b=O*-5F&n%tS$eDmkd{ZOTYka!p00
zS_oa8n3iuftK#5N<kKuJZSC>7uuTf^TnAQ(h)*J>YhcV6zaQO9XDFgPXY*94HTx~E
zqA6^xXHolysbJG1LzoT)G+MOG32rFy8%BUDbxS;rDzYj*e`I$&wlPm@c%Iyre6Xh%
zjwU6BbJavgIUY`H?1#!gWwv9I2UYK7<BSrtRJq@b(NL>T54JhSnvqr;D4aIpPgQH_
zeRAtWze|W*><8qzp|5QoHso<bM+B0}BaKTl8~`eukWEt_EvIPuBhy;trnzFXLf}b{
zkj+{n{PvU0bDz`CN^6=_BT<+-MB_Uvpjt>K8O9h_o7Cc1=TSRae2#Ip5Y5Qqriu8*
zaB4LeIckwgoSb5)FX%ebUBhcpXzjRxoC>K~hIXjI6kHbvqvn7=WsFh{*&WePvavg=
zx+Z1$cr;oe+qfDaVJ@74y;F$bU{r9(4-9cqupnd}D4-DBu6P_8mrK2p-S#=k*O;pj
z_f2F>=c7>*z6}g_D##dfRl5GOZKc^g)x?2SMsQ74J@eWt5H|oa1#wT+FHvfD*KY)c
zql6#hS75^|(%Oo$O=69(f7CBr-s`u_kUK518~_gnvF|?U$#uh0+DHs+p)x+|7iQhG
zjIL|ZV8a|y+Z1wg<#`>kVcz+0#{!!Oc6^=}Jery!s7UC2Qyx(vF{<qXv?7y5r4o?v
za=r~me;2<S7Ve_cTz=6|J}`NsXDX^d`3kp&h>T73vxYcdG#Ooo9&2xF`z5<Ua)6^8
zG5V>!W$6g*?blJ$WD`W=Jb12lT}sE3WR)+or;kA_xINr%-_e7L-dW_tYDphezx^cj
z?f(F!^|<Y4k&UTrGLzL*`WD?<{gQRt{femdq$_KzxrFWhs~$|z$(7=0(zFuCrCOWk
z==+;P)ok^92(K+BS65I@4^?KE&THe((SOu??@wwm+Fr=-XCVOR#eHR>>Q>sFjCXdD
z2S*tMS35Q*jzGsXC1sXycP|y1^_9>4y8*$i9!}iWN7f@Tk~pl>d>2J>bB#zDQbqD<
z{+20iXDmfQB-gG=?qNyk@EY)=9to~iy}hX;qohf<W$u$3dy3(50p_o@L%mVrQm6+s
zx;>i4#s2`)j`2%)CVkQM5bChQ9?W$7nw+;xp}#;KU5{p^xU^{P)^%Zy_@eIZ6|Hw3
zan(U*6psVNBVk8EnmTKPX&N|`F3o?Z?jyX@ksOA=#d_`*hE4~^#b-S!c4540a(D-d
z+YqzGf-}W-^x1vPX`CGR{k)A8kQH(UFlu4|Ja<iK3@&rW)lzz|)HZq@lRUF0yhp`z
z*{fTn$gzu)q)F;;P}*pAZ6xv!a~~DYHS_-ftF`Ohe^@VgTw7;=D6MZ)z1DR{y1Hg%
z{{TYgfGQBNdnH{yN6sl>sCcKja(1!$Oec?SS51`*1Tbzf5?35%tC=OcxW;jcx|u{_
zvN$vY$qY;ac6`>IF0M*$NnDw?Tj`LV2;<$i9D5^%(nfOz;hN9uHj^d8DN;c5;<YI(
zNrDbux?iJxJ)?S)(z2wGukOG&K5M-!(#45OBINMej5uYT#{>AEKHFMc+)lAW8#yN&
z3g}*${VUY()n>ZHah`uQwe%;ZwCgQG;>zMR{ldA}SXWZhZ2g!*FdTJ?#h+aIfN^OT
zax!{Xr)~W=>BNrQ86^khpQ^Fb<Q~$nJ@&P#cp@<rWd|a;kFGVyy*I4F3I6~KoEqh1
z-I7=2=t7{Y!LvJ9?p`xe$Mu@4(r=-^wT^pt430C9YQP2uip=dkOSquu6mG#ZVlFuy
zRb*uMN5bc3GsQGmtdyMhUipj>URA-)Gg5+aj}+)uc}$Ja=9Cvx%_(LWE8TgSnSdgQ
z34vpS;+A_;G+6_xjU?Lqjs+VWxm*saAaYi$Wd<9?Ja9QV6m-xd$GLl|<CJ};x)niA
z#S8(bIa~wglPa8cO=rDl>RT-W#iW(Nn2(C)jAG>J5wyD<>pxT3dUH>C=9NsOd{Z^^
zC$7Cod+JO1CZ0wK{*})aH>|x$cdu%4OK~Fv&&6}qX51DfOILG8qFC1Hc5?MPo&NwB
z^if^GCi9-Cb%P@&udZ#kfFtF+)KJ;)$lJi~x40Ort+a<Hs=Kv@F&c&(;-hPE7y(5W
z0QijdD5OBGI`V4^q@LPFM?;Z=Ur&CDexyOGS;wYo5`l1_09f~5FT6{?E8Q)gr+K96
z5#3tIQPH!(th${#%OBZ(9NQl~AAayxAmmrGgOOaX^i%aB*VJ|q*<MN(=>|DH*H#J`
z?j2VOaB+0boMf7_!=7n-6yCq8_GtzYhB_3-A5|`M;+VjL#YRLx@q#nvqrnHNB=glo
z{c+tRL#85}@M&FDhSsRYN&M7{&wbQPETGiw-r8;an9T}Xdu30?wA03t#$$1iYJ&RJ
zt!4W|QLZ=&ei+TGn(-WUO;drN_@_u)01@3PV+hK;VDn9p;)XKsAXNh-nr+0<+r(Z*
z0YJ$nxks<PU~euO!oor$96Jv+qfe)ndDKd>WLV>D6ZK2ijXmf6CryxubBP=t>VfqQ
z0BIrww%>J*T#j>DySUbC?0FoPH4B*%<OCp**XF%84J2_YM;k6@k@7cDnp7l$b3?9~
zadlw#YRiS>gWXATWob&tFj|VRf@O>i-v>B6)~ev9%+wM~XL6Iq%&ZN46Z$dwh3+JJ
zcA+8g6tS)+saeLiKG4C%AEievvn=T4895m9ST$Nm<%y#PX9j(pZ!$|7vYe?L8uR>l
z0=Wn2FX{qa?bC1M!><ZSuDu+J=m;Q#UUqPBmf0OhwAG@&ULa|6lm7s){{Zn{7JAju
zeMfmBjGuJZ+CHYrX%`F!`VapA74dhikR5vG3yjEXrRl4dWYxYAUl2EAdtn@O2bv}!
z9BsB$$sJTWOa@Dxhq2Wcene=<7cvhu+DAMTsupKf^VLB*u?GZ#Y0)~Xs<6u%l6Krm
z?eG5p6tW5ggfe{3LbpGsT83^vZ<F$@Y1omKV_Kin60&OQp)dz#xZblA^s=+3>9T7w
zqoLjgiPX5_kJD8ctFRcMwHaZ#yEfp3DM8%HJ)-{ri0&U%^A@)%J;YZ`)ZI<*s>K@&
z0Ybwdjz(8e#Z~BrNzc0MSMgUR?SLyLGJKlLC~SL6_$QA1H^oTXQ?=#2x&la2e9$(Q
z*6K@#7(O#Y>l#~Ml@Ur&L8{{zx69y-D=kdRIL1K+jf&ZQpj6_k>@?Xe;Q+qu3Mwn)
z3}NGN9aVGZh>pL=mzS`@J(pY`x`R%*g`_CV0P$5?>=*Z!C?kxjd1_fK_C}}}@B5~X
zHMKU&9@ePVN|B33BL^a??b~7z80_;>TL~7!VPe=?t%l*!QoBgWs+IJK3~HT|YIjd=
ztKT&Sp!bsL{{Z1$SXG780^$%{WQvl(N4K=|jxkK76<i9qTofLew7h}Y?-_tJ?MwHq
zsiB3>vz~KWS3(Ez6%XwF)3~k$H^1__3vIh%LXrC+q;DM1?=_DhSxbL)CG2YqTX0D|
zRV-Q|Yol<}l*!KA(1cUA-4ljdHLFgb#@G}?P5Wp7W6-Ffg5od&Hs$-LHM28smykSC
zcjS$r2`_J@eaRjpqxAcj-{T_Tsi|_SH?U#=e^lL0cr9((ZMh(HD8H8p2ZXr^yp}Xm
z$Q4~>e;l_)AVcH2hR#8$T00U`JyUdDG-=YwDfzP=38r|`PjlokX(~mX{q$DMxIm=%
zrb$1&9`6{Suhp+Fm7C%gjJbuQw)l?}$wrsiT$OfvPh@3n$F8eC>ZG`lia<(=u(DZ_
zU?e0dG*$JuavTb1H;#5mQg&8)emA$V#H@GXlG^g&?%pQILD4i)f?HtXpZhXS;yDz~
zYTJHDzFduaZ*{jy-_=EP3#wu=Nc>bfl#<60R3x{GT`le-j6B&tgHkbdWNoh`&;^1L
z?!PayLMEVRn5h_1=BK=suE(~Euz58cc1i8*Qa4j5rObez*)}3>K?KwVz?ckiO4i2G
zJhNu2{plo?w(tj<c*Q<}=F5BQ_$_VLMUUm4aZdME^4Lf?1W*>Nj~PpzNjz1PM-{!Y
zl^+l)wv>2Xg{1o_t|QpYJXIam&wTyW{l+FqB#h(%#bqT){{VK(*~V+3r}=hgny0|7
ztY+M!ijqR|Mp0K4EJ61Mam9M=_K;9wuHDTHk616Vgh(?@yKv+x5lxZYz;H3eQ%!Ru
zaw`&Xo@wEY&Dbp_Y&69SYqYT4^+H_yE;tm;A~__F-&qR`byJOu?9Jo4&s%2DTYZ4u
zNLcOrq>KcM40)>kLgb5*$bKtU8gs488{Afh2N+lrPk?oj;jTs$`(w>m<^qGBX(M15
zAQ%VnQcG(e2Q^frpXhpM27CF@{kvHAerkJ3?pg71z^PVPZCDHf4H~h*Du>9SDMc!x
zQ|$TTPvOrrmiZi$$BJylivXPFoTp;#Va<Jz`A9@>2v#SSG**+fcRp}xuNia25vR0|
zaIr$A*^U4e6t=qxLD!%6QV7v^Ip6{JU0?Kr^w4}YZCWmSL?Vg(DE&HBbk%i8^2g+;
z!n-60u%L|;Dz5+v<m+{`_QSJo4L<H)L^Ihy)~4J>0)c=4tPZ`UO{UzLpah{gHPP7S
zg}t<n>JpBi@myx@ofz<7W0onc_XG3tDO3<DJ#$N+Ou92f35=epkj?KPWjH=XEBh@(
zh(1}*G(tEh6e@z}A5|(YTO5Ny1(ueR0s=K09LBtIDv=*+44y?1{{T=5;xZ34EEHRB
z-GV!!A(wI5-E~#=<o^I6o@k`Q4Ce-l6ej>S0OFEUxiAM_X|c`$!&C9NZc)WRmD5MK
z<IQ3J08l+Hbqi;j1^)mjAM;%s+8ep3Wr<m~5uEv_$C6m(x-1fknx=fh)>n|s)=3iy
zGlID0gq+C}7@bPGoE+D3^_Qho$aO14**tvU4(p5S5M6%yS!PYYE<vwDrPQp;_Hpyz
zc;Dy`20{QoDw;gU9t!6)kgh=^f+`v0m?0yo*x8(xJEuZbJ|KP3P+zk;s~-5OXyy5Q
z`k~~wb(0{F1qUa$f#FG}&FIjsZf(T&(<774-!);RHT9*tTU{N<f6&#>ti3~hXyK!o
zfF3hi4Lj9B=0z6`@+XpOgRaG>lxkd~*m_2N2f1<hx8a(2?XA(8bwG?hs<T$mZS_gS
z@-`TER*;mPL#t&e{=*3d55$_`tdfMSK2KRJ7;<CVC1@klUV^mN86k}}s&Zcy(>*bH
zbLktDv5>P`9x{AUzK6E9he?mv3%HMw6=|+m#d~UFWgD@<tv>9UT%JE#;XKc5X)X<}
zpLq?<vdt5RMj7I%y<ZCc(VPzzh4k*Xd+EF3X?Pi}Sl|=lwwlG<SK3*c1`Le3qbF#^
zw``7Ee1FW?>YM55f@%Z%uWBKM#DoKk)pnaPTkbeHu1>Y59uAI{Je`C0s=9KBlm^E&
zQziSCJIKWeV{~54L!6vYljYlnAm*!$=kjgCg<G=PRC%ER8@_3zdr@tW;lAnGU^`T9
z$)m2V744bWjMFD9X~dh9T{ABw(^+4Mka^~`_CBjbjBbpY%4)dw*kh4Z{{XvTXjMO!
zFe?TvPE2Z|$vreO%afp9`l<$m{{V>YBZ}i*tMt~Z>J50_-CsBD!N@?}3MsEOLi<~l
z2E1<d7L;QleAdY7v#xg{@AQmmeoOxVvF&4Jr&)-xmIE0*&@w}8A>EEmE~PPrT5Fc$
z#%mR?dXSZ9?VAKu+HE#%OQ>`ED%5ImX(3KUhPv=W5kBAz^_t3=%gK+=2C8l^=AM7^
zNX%&aBzdG#0Kl(d>G}nyS0`-qUZLw5by4=$5ic~jcgelUAXiTPDAgmj)*1&pL#`B8
zJ+sZdzwrK5WujioYpOwXtA$n>#dI||z06K0Ck*z@Gq3uuN<_L_=mAM{{DoTjZ%~qS
z{k-QcqrbW$+h(nyq`3sLX0z7zw=mtZqm^KIuPm)PJByWO!s<qjGEX)0$Lsz2OQ^*c
zA*-x<gVgirwtKH3EQ7KIagS5TeSPW}mckYYrwqhZ`egU1K9N>;P_HDPq<*Wtv%6bK
zRphojXX?I!urXZQtg*OY!^L8KGw97bPtnDy#HM0CPmXg|dIZzxw|};{%Ro;A_^P!s
zgy(S|1Z5ahnX541X9pDM&eC($PLjtoVWT0G)N-NlIIN|_vB;rm$j3SHK?w@+ijo<N
zIL}mU6dJ9A-xRLd=Amq3Ak%lge-vz#5Tp^GH7ruL{{W#vp=LjdC0Jk%GAYokEJ?`v
zsP5wuF6120@i8)Bo+!}4M+YLHra#smqPNg&UFA8PkBZj^^)|0@tj{gY)P$aPsqtOw
z^%K(3>-HvC_?|J5`K}SD>91=%kX%QyML0RG$EL#<eoQ*a&#wkH-WZxmB)K>Pq4!LZ
z2-ug55Ij}2-J~VRZV2lX!3GdvM$z|OgpOz|dV2&{BWpKVrzN3BXm;fOD%ViCid%Sr
zU8M#A6eJ=+C}P~7G{7n6fUU+4T?%O%9EBqyuJsFKRQEy4RcW>uGN1UQaiD_7D>?6)
zW&B`AxEz{yv^SPk#WLe~8OC!;`3fZTzO{Lx>M3_?BOzAL2EM`lAN@eJ^){6pGfJZA
zKm-%rd|?|L`(=42#dS~7-3woe?R6_+@kEC>`>t-g6kK<7&6;V-_Mc#?d~-~}&P7pr
zOI}<1VFc~AbQoZK)`0WXar-y)H=I|7G0l6U&sF2kR0uJKI;OA~%}Rf&jEo%6V<DkI
zrpfv4q&;y^%bezw@P<c}jAEHQNi<~~1NB}!ns6lWd#ITxqPVyA9}GD0LEcXtye%w_
z0w~EO){^Q0JY`RQ4RH_E%~Mvhz5f8FENqZJ8^@Zv4i}b6C|TrCR?7bXUV5tIQI)K8
z`8LIzF$cQG3s!`dEDIkdi?xMqmw9dEj&oA9WtSukhofGrOQ(())J{HZvdJrEX6#5L
zcKmiIEu=EbxSz#X>R^kXVNdcns^^X3FBs&5iqKNRpm<(Sa!^9AVE02@9LP<xFihte
zr^g76CT4KX-VGwm+i<ZCJ2eP0H<4vmbu8bXHKK+L+HYkl#B^$!)<4{TlOQ@C0j(@@
z$o9z6Mg~SG5gFwd+I8aJK*a!V>*x>Ce_g?<S;u8}FcYZ2B==luTPZHB-58L*Dz{0R
zeIry!AdrSe`>uAgMzXFuHQ>(R>GXYL>UjtK2IYazz~a6_^$sM~Zj7oiyx`Z+c0Q|)
z`_a?KcOr>>yk|d(`O{pLS5}rrZPD*0vU+qJT$Auws%fOrR-2Y`!0jvbOFOirvvL0b
z?XM{_O(dWIxuRtdy{13bJR0kmKx|6OdIW<2bZghG99J>|bB?h~U^8-Gj1Ls+$d*<t
zqmxS@pj2g>3OFXWU#FJev$zvG0r{?Ygu`$#&JAgFjYf8wG*Upwu0N{co?H$K$<WYE
zLn>_Q_jWMPF}hbFk9eiFfpfIB>NiM@A4FGg<V6UvOAD9B74rEylZEtqxLl!Vxzp^@
zDEBvA(Au@6cYH-A(mstgElx0kHgE2xl@?gmOJnl-tnYMZxVAkBr0J#=0%?gJIjbG4
zl7Sm!P)OHF7z~*w>WkeZHs(e{kSd&wx5)9jEf6=dL2gLmv-(|?#5%l^O52c})Rvm0
zlOK|@jPa3IHd02!Hqll5v8KMqswQ+v6N8L+s(GV--GSdVWve<{+q}rzamlRphOZsV
zZjUVLd8Nl0K2XX$S~Pa*#yx;@=7hVnmO0ge{At>Ls*gXmP=@}fy;kfr1N|gXZ)*}U
zmC-B_FSH8}6hHmEOviW1y=``~MYTvL6;*8${_0P8jtSzHlO$r(<R4eo4Yk53lXg!+
zt>M0q*%`JR1<eUIg>N+LGx+3DlR`zSLWkl;DVoPgA>}01APqBHgHe&fCRq*$qOPNs
zb@+m$P#OlBx_r`=OqXrhHEDBs?IB{E{R){BH&h6na)!Y75XJK3Q?2gSXWGL6Q1EId
zWz4ZMFZECN;KpNInnwn^MDMVv#iC)^m4CVwk}H)WJUF1I(`CJ5B$AX-(9KV0r_Bfc
zjHIZh+ZykZD5d0@*1$puP!xSslI>B0UXt_L!b??pwf_LOv?7vQs+B1MG3T<%D={be
zLF9O%;%MeVhZ!C!y4K<2vMx`{ZfYGGW|~KkWE}IF3Jog^>Rbh0+h%kscoesCM;u3Y
z7!?Km>PI?;IK@MF#_rUz^-P;}Vr=7*p>N`SL?CgT^;h>(#|@~F_yksV+f0Qn#P}tL
zim=t=d)rTCn_&D_iN+2}nH%tjgtwG?!NK!QTa+^h$Qx<8bP+>rBYfZv=^3p<l>`Gz
z#nKC|*iFMjun2(N^e7+OOtSl=l^<0+FtC4c5sHTP^_cr>$rNIp(iJ-Bj{Rhj01cX0
zEfFHgVVrYD*qe7iANhIB52?IRf>erYe~BA+zyzqqWePqytC<u@Xd~=nnh!~mH~#=6
zqvKM&=l$l$<Np8`-A8Tz0D@%a;sY+rNX$TYR65cy!^SH`sj9IXdC9E)u_%dlvF5eu
z`I2a*x8xAc$pGEmcraiXk&1yMX;UnCrppY4HmeXTX4FW8ZZig0`4lr+pj<kNmcf5$
zFk2w^nwjUfl;E(an@^60qQghq9h(3##Q@CP<V^5Q6Jd7?69V|C=0K9M#}!OnN!Vo{
zB}+pXkfRLdtn}|@1lq%j3>A{=XakBS<~A#c#!X!|-wYIxW4v$tF-$(|xJ5iweWkqi
z?q+u^4~j801aIJnbyV@!<H%zq!kWT710l(%uAumBjf&ND<5QaCNEoa`#arpltaeN?
ze$5FfMO}q8XU~w0q%kzJfbKEHD@IftXNs3fwx0U&ZJ>#yb~qRn_Blx;e5oQBE$%0{
zSS`@Jj&J}K-M>jcO%lp9)iqf6N`6X=I;$VjkJDImxKmQp;}3A$s<7(1pjA*502Fj9
zkE_;yY)!Z{_wtbxNT{JvLZ}!3Q`vyxmCw-PqwMiqrgRb<I^)eK$)>Jp>-nic1HZFN
zxXMVNiool-UB;PkW{eXSd9Ie@CWO>2BD#U1bq6Aa1v3F=$uwcq^W;>;*O7`7TGOsH
zd$S~YF)`#+mduwChK%I-6bDYlY?cS3QX$<S-PovDe72s&NEj#tj;IBewzoLo^F$dN
zJBL+Fl7fDy+k1iLpbHY&!ShqWa55v!0~~~Wk0Pavh8a=vr~=K=wuT3aTq6KHiiX}+
zYyx`mQ~Vd?rivgP+HWK<&2!IP`a(Ozd#C_M10(@mG4`<Pgqk44sa%8JX|d#%Ic|t2
z1vt|_UeplUO!mYbz~Gt+a7ut5lihYt)PF@Rq3`Tt`Holt-EsT7Nm62xWlEk!dObF#
zIir3~HheD={Q@i{jy|dj=m<`^6=$c!#^<m_OR2^eDnQAtZYqiqm)LqE6xCT2<#V4H
zqh*cEyF#8#bsy57(}^`{<G<8Ut+Mg~9oBrgBM97!amC5fDg6-!ux$jh#^N6Yirw4V
zLvb46@YU0%^zD!RG+Dz6qN?CvS3cGB$+Wwc+72^bJ6{KyPFVOohKFC56ci--3DGsV
z^jm=ol47mym#$dfAa8Ulc^R%?B!=yFOT$xtZFL)u+sH!x4Ri5oB@QLo(aD1jujHLG
zqxzZj>zhVLWiO5mX7tTk;>z9aT7A(ThH8*Oe<AtGNc@^69Xy20&y!O&s!ex7<MixM
z{N-h$vfKtMgP!X<c8zVQW2~CEd)sR`@$M@B0KI4LuVcEr+>N+>isxfeb8o<Ap&!K3
zX_~sm>kGC)t#aEX!~$6RBDt=YYb3XdPIhLxo{|FEp!yZJ7RW9iC7JfdF>&nnodd*9
z>N4@dTC;b>BcG4QvT8+$7>IGZBdW2z&yx$C%Gr&&xQcd>7x@n9G%W@7R$~;qMtY&W
zMQbgtrexYx82GE7Lf(GgPzjtXRakMtIjQu`8pycfQ^v#7&~xxi)NQw+V+0wT5nGrJ
z_{a56WFS{rY4Im}P9xN1f82Bb08@GgPQTJ#*e)L*7W~)CzP<F-_owwqE$$Jv=i=Dz
zzS^B~wTS&e^o5V9H0O$BQ7|7C$2HK>=k{GLj5*-)u6sT<w~dUMDotp#a3f`AJ@~D(
zzKn}Un&#f_Bal7>ipID|?%FPjwkx}*)I%erb8=yn6|(*rR%P2J6fade%8Lw)gN$HR
zWF|5~2ITpxJ1bcpQ5p^#yw=G{MlHUIVw-f1&H6`u`^^bsX5|JstDSz?qL@t(=gnfh
z1#)zvBFD$hD|dB$9n5MY8=0%f=4s?^JssR>#wjw3QnyR{R)*;VD+~}tSLj}!)AW$W
z(iKAIB;vF>>Z}4hU=lf{mJlrCk$j3>&YBsd-LbZ@ka(kRG}!Mgh+sEkgHh?}o*u4k
zkPf)2nPr)43FXsne{FG}Y@P}6S`f<df~O$$OxE=H?_nwc4mveOqhCIsa@IGejQm*A
z&{=mZSmvVx3_MZ6Rz(e*gVjeQT%IZd$b%igj;d^eDgpwxbfJj;s&qn7#X-e;-SM7k
zGTVR{r7Ckp5SemFIp(K|Qmis~^Fzi@08`u?WS&hhrHExIoN-t$)GtWIs>Ii}vF({g
zNFHlr1lb`C7lbDVkZR0XC5}o_E1VRSGvz&BPPw+ae|93F4mqlsWOkEhOJ72Msd`E8
z^tmoH<WS&cfE=3OcUE#l<z-TihYSsRT`s0Mqkc|4Y%d!iCn>YF9%y!1Qe^pVQ{sp&
z8~kb4nt!((>JT_qTGmiaJV)A68;<c+_wY*^Z(c)HgjtyiXZZ2`*RD~aw<c4O;8d4E
zGt#H{4j1G+(<3h;0!B|lj%Qg0#Th58R`>MZ(9fyLmv=~30CR#nteLXM6n8S*c--A0
zzx2=a{(WBYZ?7XF5Oc`xzKPOw`)dm*?cj-3RAYfm(`+EHv`8(`DyYT<9Tq^#3iEoM
zM6*luc5va2G*#c)%V%&etqE2KBo8&Q^wzZ&rEC=ZWnLH>sVXJmhX=(}UBPjtU5nc?
zKnLWav&Ky_$wtnM-SJEWgH_rFuMVkf3a}CZ@)c%R<C?#+o`ft-H@bBp;7|zMZR(?L
z4NBM|qbf5&p_3yk&U&v5AMT`KO~4$AS`CP#9zLpv);^uI^#y}NG;8jt_%=E<f9E5*
zP%=60o-%Q1ph_xAne#rmq2Fnj@>xj<03I0ms@cnXVh%wS^e3)8J07)Q?GlB!QI%ol
zxJI#SJ(cTPUB*&Cagr<2=(US7r`gHRg~8v0A9*Y=+&{W<z;I|^Y@M<RGOT%{wYY84
zW3+DaO#!+g1{9Fl<2Ag@Hu~E#k8VyWrJmbtXPnc>X-FR8sT7a0CscM1`Jw`QCypY*
zL6&2YT8k<F0C8s++k%yscwIo104RLb_KCIyQ^_WxK}D&ZRxfSF4N~0M39gKh=Ledz
zEVE2c0-G%B6oFMa^GwnWfjra2146R4!Z3NP-m7i3hZ~UcYfp5l1GwNH2B`J0#uYIR
zMI(an7TH#|r6VmLN1;q?6)Pg)sqsxQR$?)?x{2o#GjCjyFbJyJ0pyM-wv|64QC{zj
zjt)VlM!1t{&VMvr=#D#ecVNdIQZy7*EM?fS;MJyvjF+*1b5#+^?pzip3M$URc-G||
z4<fK?Q*mh;^zA98c6(;IIxJqu%K_C$moqee+^}spq5TJT*S5i2aH5aY*saON2o>_J
zRy~>J87^%1x_q0lsw&pO&Cc=(QfomHy}hn>@C6BDFeh^gvz6XhvJmYaD<)~VQf2YG
z<ch;kG7!5n&(&R$@<bV;1wg7x61(yg3bu~y;78*Xmm*2pExYhX7wzVhsfaG$7_Vyt
z+gTYc(C$&3REnz@XE+~|O<Sd%3_u1HiSuufzrZz%CaI=>oDxXG52{N&Ceu%ey`|*U
zjg6sDLI4!<X}9ehe*$n#Pj#zN=w!Dr9X37JC5Yr2KTxtVsA72YLut&3DH-Cf7ZS!-
z2N>>zo4aDUwJr)f7(Kblsmj#XFp%4aik{p>9L}KgNvd7PZ3r$qse7FwNy~91tnmnQ
zFBqr9V3_Vk<Lap|F5mXsmLf(t#YO4oyO@@AD1iB+?8&Dg=~pI_NW$^}q1xGh^%#SX
z6`%D+p?zySX4@>8=B~84WV*6RXFGBz%M_y(I}slJ!pSGH@r}K8L>p^l!=V)fhS6I-
z;*ai`Dy-lva%cLa<UBX@Hu5>A7>9xODobeOjxeRU^HOBmTmJwd*P5i8QM#7ktZV);
z*`pXaaE9%EiS-+2dBmXP4-`p;IpIyn0Tnf*(Y!-&`F?5h1z(Wjna!m_rOg3fK<fbt
zJXf^A{{U~m!w%|2xLGIm+|e>gZD>>se5q4!fuyQ5Rq9rufsxdcPt$Pr*kOPw(&O8b
z+jj&|$k2;&Kpa!=wMAUfS#;t#L0+n8kuFH*`>L&4?YA9^oE%XaJGhH;wlh}B4Mxyn
zxVPD3brPu9qN(Ju&Bpg^W2!FKP<izRgWDtzH7)#7o<|iH>~D}R!A-T^a<7=_k)&`j
z_ZKXD(IR((Ev$1;#cnPe+>yBODYrW0HEfb`40itMILNB05M7KZBxbDcqFEQ(CnM^c
zZEO2<#@q^MwH0FD90MwtVhNrFI>O+ajjXv8`G9FQ=AUbB@yqTy;VL<C$kA4;8&#Xy
zxC9T0tp2wFx|AH?R+m<b-5Zc`R9DFU*#odER-M%}XMFZmMj1XExD_)v2nI7i63SSy
zJXfH&Cz5y-wZi!s1j!RJ5XTvyR@Kr@2_lTNb9EyE+zNc)Mi1Dcq~pLl4bm;5C}aZ!
zcS&_1CBlw{O{BIhTX61&eZJ{Y=CktV+a0%5QZ#o7x>)nLn!M8EnA>lVWRXG4+h1rL
zRh6B=j@RYYU6xJ9p%U^$75(p!_|(lD3CaPQh+V@m+Ke+!Sa(VZCly1#JQgZElQT{i
zZd3ZHZmwa8bGC{vZ4z^}f4YKbmgPY}q*7Xapzb()S~jP1sHK!JGdrAcE9g(spVNz7
z3ehibqggJPXOY!x?foC6X}XcVhS_%>xazKsHjZoQS@p7<X&*Yrr%G1HnF6RB06N7&
z?c=JNOq2MiU-Mk|$*piS*OYNyJy*8?G3KDcFBzvNCp68c{i#&s;8a8&#Pln{eA0#5
zKQ#3Rx-<&{ztiEoh@_|vMRUz}O}f!8%&>xCCz|Q)$2qF4YA9~)+9^Oe6dMY&3k0#;
zL?uS~c@&|@$Re^HsMhWMDXj?FK*;Bn&qB2NZSB6NVwYC3D=UoPc%b0xa_F&1H?@T1
z<EoM(zyf-vrd4*eE(imjDQG2x65wrqjT>&;IR}cT5`KB9Vz_*PQGmKe2QB7_hCp$V
zimrwJAeu1`xq`f6i31$22HNmYSDIul0UPR03uJXskr7K8JY^X{9dlf#^()bfAbaZ=
z@Z*+HE3G!t2%zpFSu&0R2ZNe?S!Ie_qAcL28fVMVJ6hfCk}MIy2BXxc_a+-xCAt;U
z{{U0H4=&@V>8a0U0H}XV{{T-Wy`69MSjwWDs-HF8hQ1QVxtytKWT&+PewTiq$*4@4
zmZupIgOyS{uJ5PXLvL=j_VKDBi~t2KrK(#(utI@jQGfuci+g-8b;i_cC7M?HIyi8w
zYxX_A#x|2xI@Xkz@G-#Mjw;ng{#uue;B{GO(R6YxZ$rAan6zrZgT^QeY4s>uYrx0r
zitRTxaZEmOf<LMc{*!CFh?o8<yqGcM{E>7u6Ng7MhFkggNLxSEL#e~7{@m?SxE0k_
z!r3@jlzwqQYWC4I3<3dxRmVq$9H@@<d1d7J=U9(cmQs%^fts@P)He|TCnWw&X*J7e
z*5v#^=7LlTRQRj2SG3YX<CAkuvq_>$w5a^oLDOZ7TBs+*a~(QOk_?<y>(jD_8(8sP
zaW@=?R<0$T#ox3t4?Zc@+M5X#Tw^qsR@?AG5&@vDU^xT~S5gs+jo9&wB&me!(Tl_q
z6&Mwrh?`5;-FN}3Owl`70^S8>wJ9Tvoy<6{W=+w{XjHc+1F?E~f85x|AUVjZLZGRv
zcj?vY&88?;&f3>8BxG@3k`HtfI5{J>Z3D`u1XX>~W4g4EWq7JhcJ4b!w@Fe`QC7gB
z<mnl&SLoLIm81LzyaxupfAv47+TQ)n&9Vdk05$YWUZvCqNX=k<d1n-n8TeTT{%Xux
zpKIorvcTu?XFbtuAodsCPnX&8SNc|&?Ol6|Aay+ot<&ye{{SVsE?dP|`X(Et){4&M
zp)K(SwSOjg6MMK~y*zQ!Ym{G&)^`am&OD0H*!qIq5IRHi`lEFXF5XCyhy<FB4H66%
zRUewlJ1cJQt}o}_LBOjhE~YT7ZP216(~=|`Km*MpLW2h!3J!_QhgdN@wOj}IfDKW=
z*gJexg`*tc^-4HfIs4|S^{pXqqW2U5BJvGcsseI)sW)I@eA2QeX7oKx?d}DI?*tqI
zYfm9pP!k6ux}n$frMXXjKa9)BBBS)yu2NswUU4Gu3G+dH1+OL1fm0>9CYkLM{Z)&q
zI^<yeR4%NlNEA{9ft3nCG~u+I_Gyfy$+-1Pxk19vAmBoBe9{)(&-_$RC_H(kQb%<Y
z2|QUpA>B^{IBfPRG(QgUN*r^v^G(2^ycNO02fF27s-A>0teU2ryyGsQS4@gb9QdL>
z62QpdGO)o0uEUyGrD&@rH8|5gL7v~+nULj3;L}va5E$jZHP=5?y$SnweOpe1n9nYt
zS0TMrT$lKjuS<hb9MH6J^5S^qOfyfBRbAL~;8aq3*Q(g$4;5Pt>j}0-3ulVyAE&;N
zlKND-xynX)+I-N@sE#PxCM-`Qq(S<3=vg%h7Vhl|#D_eN>$hoka9G<cP@q)<7y_N9
z*~MvYutWl&9w?<>v0iUcsFrD4J2-HxN{0iHkSbUlmKhY-$IAp@RJ%YKTFi@y@;6g1
z3OW@$((Vemp(d1WBvGIg@;%Ons0+(-Oe}s%E1+n4tQwt~%@YDh<b4{*+#=jWQI{%3
zRoP2(>HE<vWSe$8kE)7WSS20E>LeUfN#_QJ()9?n3q+b02P2=w5W@pCaY{T*HzO2;
z`J|;7QIY*ui2|H9l{xOC1m_2;jS(@!0Ay26b533<Nb)EIVarxC^%v4GYgUGMkS)w(
zAc5U%&)Baw$a7X=$t-f*g`5<XGvmKg!*OeWEu4{iYz_%EPXr}_2P7?h9s0xQD7C8x
zzOjrY(hQsq>*vLS=E~{qW>XxBO0XimHj`Bx@%@~f*q%05Dz~`OfJhxws~5flCQEq~
zQlos~QODgam-r-R$IW@ImS-eKaR7jZ!90w0T7-_;+QtJL?{@C8vFa>+&G7~<d&gCM
zqr|4wB;B~@qvDKLAq+8WZQf5bJjK1;=bAQ8g6@7n98iwF;%x)vR#-^U1jxo#!iucB
zSk+08`~l*u9bGdX2x0#Kngdse{?GiQWBB~ivJ{hz<<>0WjRs<t3xK6~9GWO3s|l32
zMo;2C>EOKa5a)a2oK-A^ebmv3bDSD&%ZV+=$CK4dH|@?4#mL~AL+uDzGlDv#Xo#g4
zNeFz5(Ut&-ffPO2-#7qZebm}(h^NaO;MEi8whxoC{+&$grbxj^AXVkP-Mzv&VVBQ2
zsvpy<=;yR}fB=z#D)L(dwPu(7E#kgU>WZ~<QdG~NXer@Tz#95E??gj5&1r0|vlh|F
zBf6wZOLtxSa!p(5*5Oku*il^U+i6#3%S=Ar-T=FqfF`7h1l~^mJXIa+7js>KZ#h(>
zEv)Xy2xsT+sFpWfmPOZOx}-AP2DgSnGvriyPM>#Xa9eo$HE5QP1)*a3U^}R6Zl#bI
z_8jNBspM)qQsA;H$`7`=SiUXYXFX$iJRx2<%X7sSt(%x+j!4wwJk^1PE}=m>2H<dL
zV@eH5T#>qVjO~cImd$5&D96QV^r>Y_9J$<aK;OWz#-cHW2dc5tQ`~?m@x?Y-sdOx#
zmQu-bR{Xda8KZS57W&ai;~eIjs93yrAd!_OtRaMZA$($$)M=p0C8PnSMykHt!yt7>
zg;hhb02I*_LoTd1s=ZrO7P25|0;x2wbe#<t?_t+AYdUg+6vG&X4{RO<J{?76w*)uG
z9ngRE3rmTNamou0#%d)ds)4np0dZw5?7tFK2b!?8vy#;Y$J~CZX;=uh#F(M%-r^@L
z;I}mHa%zWG$*y9aTnPpTnur);#6bYhnx0)s85`O>N%da+mT@eH`V_AGik)^P$+v>k
z%bvNQA-Ffz?(n|@sy^Z+k|P;hgT(=->CbC3g#}|iDdDlMdluRQ(c+OV9!r-b=YdbY
zl1Z;tG#OttZ6q-&Zc&p#M<4yzEyifPmh5P~+_HJ(Xe_V>a41bSQ#R#cfme4H%GSpR
zJXJI*u*_8P%^0N7T9`H6wj?AD38`X$2*nq1g9&mtpklaHAz1O6WZQ@nZP1bKj^i<o
zYFoSK3-Um5MyzoZ45R`@1A7+VlMW~@Sbs!N&lKY_fTE$lyAdYXIb*=ZNcL-U2oW~g
zMAQA0+*@-Bx8c}0OdCy0w10A3wt2-^+}){NtT`RjR+mh&6=l!+Q1{!OQp&i^Mz_nJ
z01`p%z5?1$kZ9*J-hYf_1I-zIJhxFBcwcK&P+ROm!{TTuK#JqtMj^A!TT#9RPTeBN
zK`a)aNaeWZsWo|oa5D}`tLe8rj_mS9P~1kZ8!K{Y@ONylBDQDa#N0W@-8xt!18Q|r
zO#qHffa0D}cH?$yqUGfQ5n7_fmP(8Fe&xm-U}lMkMwtwAP&`Q^@VFFW)$&g~7Z4G%
z;C?u$A0FXjMB4<8%Z|kbF=j%_Ii!1L?QjIz>=t{7*5d^6MyU59$RlYLRMt;3ZYP?v
zw6}8}(AWctGf`3DjT^cVV1%FWs17`eNY?a8woU-^Q=}%{3j>o;Lv=08g#vOs)oM-t
z#@lX?L4rBRjpXL0jx%U5A1h3^wA%jT$@2c_4SFl5Py2ZYG>WA5*ky4(g)%E43@8cX
zDBFiCM#`X^tsnuHYb|@H$>91c&pe8O#%Sw{c2Am*jSk8WRa;a#V8@E~1~XE!jAYbD
zCje9k(sRW*0A6WAXMx>#DN~N>1b8{GJB1z7Ng{-~lqe;RD7FNL+)k>8Cl${B08#yK
zZK7DnkvMyX7yurrpQwJci%Pb5A#l?$z`^RcHnnwksocEx5;GEW-D=Y4<(=E%73a>e
zDqWPGq15j+d-*P|-I%f96V+aRpZdD%LA_L!*{zu1sXbLR_NQsbHCsA7Px2MkmI$cr
zt`1tuE!(e?>MbWwv(;>t>efKOjFFzIa^w&@6~O-hPkmQ$qh72sIX2F4z@DqTX;(J7
zg`(WsNF-JnJoQ{nMxEl9=<8t4vM<>-K{X)AI0RHpfZvK&+y+NRvl=Ki+HIsO9MyzU
zki%zHNdxR*xOY|%PlJJiC=j*WBqN*=Q(DV{Rj36x`2$2L44ahmMKBf)J~>|LqCLYT
zb5Pq(e~XGJW3Y4hs6rVcS52;=hvuCEh)9i8D)_}pxF-gcfMd5G(V{|6bg##})4<~<
zg^d_UH}&^L9)2UL9EX7yypF111}TaaR~6=P!~xY14lHn2#YF9jbGQQ@Xm(=V@F-AG
z=3?D-Lf@fyj(Wuum?!Z|a-hYQAP>z36mq{&!T$hsay;=>4x3bGv0A@XMuu`xfj^4G
zYqxhc<spLIq;@=<c%*eDw##E_*jAw|vBHuez#Z13(psmw6&13>y3bj}*A{{~mjsSM
zq_bJ{+q7uc37X-`F`Qg{qtNNn$qlcwu0^Oa#y^SUsv7S2c6NhQmQy^@D~2N?hSbvO
zn3N|1vqE!kZHWvC%9&}Y>a7)%5aT%LRtElgXSgr!6O-0xtERhFVC0n)Erq`OjnCF|
zirWaRakh0aY2$`2M@s!Lyjyrm%^*+g*Frk>o}J7I{_B$Hu@IT(6=Y%C^J9#DYOnQ4
z<6BZZ{(T&BWO8iZ&FXz9HVmPE-m_k<)va#b1QG`r@M{^ZrdN9BzZA_nIRty05&hF-
z*Gn!gOCg|y5}Ib8NjS4->1z?|Tkm>P8XgA~a<{TZ8U<s6S<O-tE&4_Va&k>PGyAGI
z;#2Kxlbac)=!o_oxeJL;G)|QauXH0{iv!}VX1uo3?831bQPoH38<?;4R+Uf;eOF=`
z7UcwU<fPM)veRI=FrvuWz~?o2XJ`TBj2atGP+a8kR!X2Q0X1nsSLy{=5HcL+imgXx
zL69(N-&loS9AuiOMkRIu)dB><JSgU@?IR!pMFD3UgOOIz?%Ft@!ble*@m|0kc&4ju
zZ@NcvP6wI<5ttl`q4fruC0~9IjH}K?T}j7B70mwtR=s})gJ&h3lZh_eV<W{##m&@F
z9F$eFRq20KTHIWnF5=-u)O=$-*39z|$*+sOGpTCV9;Hn{PU|Z^NzHvX=wDVYp&HrU
zvWeAuC%V;_M?6usWrnU3meW@(Z@)YMC@DcMNUJd6lYyFnkpamVtcI`w89daff_tGg
zty0@hh|4ntQCKUV)JS5wmNXeaekPh2rIYbRd2_=TGjw#ac;__bAY!uKm-S_iw4y(7
zB%epkX~!5niV}>V>5Y#haz}JoFv}h)dwDVO@lY(BU<?{U$G(kA4F^uK&kIE=hmD2^
zHNd}EeF;64(bP2f%RwReDvqnT#X0N6I_lmFxJ+}Pc+X&pyAF9`m7=V;l+`om&(n`U
z$#Zg-x2Qy8kO;2VVP^%bOB`qvD8R)SmJwR6$doGh0;KlGbeZ$dEUk@)6mhCUWB8Gp
zAx7h$6va%R7^&1_ilr8@Q~7mLvtScUVikWi3(2`i%@lwuJunT&1anMWlbmr=p&g#+
zt^mYHjzWs{<+4c;idD!Wos+nbaf(SKCxUWl(j}C(7k56LzX6!FtDpF;ji=s2du)?L
z!IX~`f%PrcpJk-Yb*V_J8Y#w2b05=x)+=azO(vZ?;?19G5^3?mPo!6pQ9hE8eN;sA
z&3eM_3(HiV2M&%h2&Ir5(P&i0nnH3hNI~knI5eXVT;OJ%k`MPy&P8}1x`7moQV?^I
z)i@q`q->Asg%E41vS8rnv0Q`ohxGhjM*60g7+FBes2$f{$C^o_StBaI5Jm{AaORdN
zZith{rm3G9#ALH)j&;O}am^1Q_h<Pw3Hq+j`pM`KwY6<F90AL!isHj48+?0M9CKcy
zO{tD(+Bq3<JZ_lM*d;ON#cH&Oh_FW8fmzFF7GILLKbqC(_RehAdu>n6YfPyUT}IDt
z3bt$3_LDp#Xyj7N%M)x<#X8PwgCW}3T5EI?E0l2&1ZDhHUE&Gs-DeH%D9EeJraT@$
zx~#n$qanL)GeCwyW_k9h_^N%<TRibeh;T{cnmbpujwr(K`IF+H7MoPGG7`XK15~mD
zT!`93xCN8X)d?rYHps~P6)dq%?01islT3+O1IEX2G_4S+kcQ7RkP@STlT%6&E*U_|
znu<N?CfsqFrH+ly>D}zfe;O(NN1E(<tS=^<t{0(`T!YY2#4b`|a6#g_-1?Y`)-`1;
zq+s!1H2$ZjsZJ>N-in{Jtf;q<<H0IIj`LP2e-xlbryq3{#oRMO(M2C)>Z@!l=899l
zBoka6s!HX-+O(fWqBzy<hqe5^>Cp>_6qW?ylGVMNZ0+b~gk61JQxl^A4mzxAE!`^}
zTVe6t%`htJR8!YbwzP2@fEd)4_g2jqILm!dcbZ&xlZoVggmeWYcHhW;JeCh9?g%FZ
zXtawF<WLsYPYREjU^=P4>QRPkU_&?+UfX;T{(&#9VltSC!4)Q#sl<|dSsAS5)ve{%
zs5di|iykP~g<-rvqmkyX_R>n&WTN;qTBP7byUjMr>1B%m<l>@{Ma{Me!NmusL@niI
z3{6wDzCgC^pjl;^brTt5u$qHa^y_M-Jj%<o=eoF_+uT0lkCjOaG9cV=1z57m4LFkR
za76`&?QB18xHw#7)?ZEZ{MNeL+_bF7=Qz!4)^(YRD{<CpWVT4oNyw=T^NMxk4&&H7
zvm{YquS2@2O=i|f%q#qd6={0`ok}6VJ<wxL)2(ny10nNE2O4{ZNfO5utKw6Q)fMH;
zpZahIWB1~STLq8@R@y#=dTd(8b|@x-px7O&-avQCH-0MC8<^xya1Z8%x47LNS$V3-
z?(urD?I07$rrPHCL~*sqW&P4coG|9S43V|BxXY3Tb4|Fjir_0}s!7zxX3-}ZsFZ%d
zpA?C-jbSB_!73g&2AO!VHQ;bS8K!8KY7=vEe(5!6SsN-%HygepxW{!1t!3@RaG#Ho
zDw5vXV>To?QCbT){mXBc)dx5*$=3#~$2wa?Z)>C_#G7G6j5gusr}hlck8$c0-0m_s
zrq0Dq1Cc`Sf^;B@*x^a9TbUYT?hl1SX4C%Yw4-!>Dto)w)a*pTG}~W9ZHsd~tVfIj
zX(NqiOrZo&EojrXxhcocsV$OAPvTAwie{2s@<_wzTcvau=7d{zxjzBdbp1;Gl;M_!
zTe%>JzIN`bV;M9B@Z}cl5L<hDhTuA?IW44VSdOU8J_#gdoXT4x%{E(s48$=9BNW)~
z4v>>q$(Ym>i;xe4LS4(ThTPQJyt01$fTwP1HAQaLx^YyKWowcvS7~MW0tHygs*v&B
z4y)ltF+xi`-tdb#290lg4(PqIjh@>%%}2DR?rL2&2_UvC?T|$adoIOh&1PK5q%&8^
z6n^2H?L2v@WRc*FfIQWGozt#M59DgaHZp@D6?EgbvPGxC>nWs?GH2jX0!SiKW5Hwf
zOKc&LSx;1k2;(^baa75s#*f8-OC|i50fMr`D(cOnmND_<cbbCQ<z$oFVeu5}tBZSI
zkYJ*ge=ZUW{hhVhl|fa_PPvf-V-59QbSd3FKZwtY_q`qgjadL)t|$wo5xFWpCXBdX
z&x~<V5UdPuO8E-lug2k#)i-IPCWWHwm8j_|hfHn&98*}}WYW3XKQsmP$c+gs1}L{d
zO2=JXssc%_ckAz1MQ0mY+qh|4jyzDlz4e#vqI+vM4I$u=dah_4@1^&pV#J<Fu8xO7
z^F8zETaN~76b>N%#oEi9~BLUYYg`=i;|KbMnQO<5yspzu4a<-vthXaqmqcQz>S
zk;;>AmU;sKg}vdI@k8HPtg)uj8IMA|+1r9Ury(lJ#FLJyHDD>PEoQWrWQtVQyY%bU
zGw63~1<RJUW5_3}slSQm8{aLv%~jn(!a*1x?^&|pQdG+!Hy3F7YfQYh)h*WI(Zj0@
z@Mw}4e=6a=fc0eB1<G5_#h_z@&sE!OFCf%ylIGe#$iqF?15>AXU+C=M&a$uBC-NgA
zq>exi4;2w?KOEEWat{@h&{#t-IKjqf_y!I^$f|TBfkxZP%HU^;fMv23VSqkV_{$@5
zXl2I^t<4fP$erPZH)4bXk&t<*Vr-0cRnlF4-PK)5&(&B$;n#2&sDeq*`3#C|as_|I
zD3d&5qhYs_I5h}~*~9X@3LbliqWH9dSk-p5Ea_<f0Jr{l;<@eawSNu4Yo&4ls&Yz6
zd{vg?f^U{|tGlhRw1{_Y8LWlZs-=1Tqy8|aqSCe4u4CG=fJo#qs@-ExdE{wg1O2NM
zwNaXJpT(Nvr*M0hDAF&g53{uwEshOkwa-)PjVX-CRZohF-!TBB^Fr$od$^Q;r<%(h
zM4YzC@O5-paH-s)O<>noa)*$er^&5Njh(w=+*BtWkySpODRXGJ0As~nBn+f&Cl!NN
zmAP%`)2MN8ZBY&8lo^ZLuRn?fZC8HFg~1<ndlamU7pju#2(KYH0c0L)JSNP^&?cS&
zs6qbJY>3CP#Z|yw3y*F1#aqWHbs&+&JY~rx-I={#Y*Ic`AdYGc8U-p?c|3k9+gGz`
z7jpTgX^}^7%Z>$YmRQOvHgES6$4j!((2IBg&nBhZ*B{8^=u{R;E~PBF!RS#Fy4##_
zkzDM&w)_~S`4L<sN}xE-C_Af_*i>T!s*3XIrjrqZO?oRCU*{tLKBy?aBG^5wZt|HJ
z9ko}k+z2K27Yaw3(OpdxR^?Oyfms`?Ta8-t+fEZan%|>@vQOjDMrlSdxfK@|cN&er
zjy#{UT8~R3X_GIu*>?U$rA-#wPqiC=mN_)tA$MOYf=DL44lGIsD=tShUJj9^%_Byy
zvpK6bxpf0EnioN|2^x*W@zq*gAOPeFxtY9_+O7P1O6TW}s=C*b3GCCgL@jla2jdjQ
zIAA`g6UlFCetSWy1t(wwil(!d{GHX9b4u7~e4YOQ`)QIeRRkWZ&fqEBKm)qxf2dxx
zhSDi4Y-I*x!C-o)#fn*Fwji9;6|le5FIT3YZ1%Q9Jj7s-daf~hVA{ORb8zL|j!86P
zNpXF0WRaQ4?3y|&Z*RySiuC$T6mZA(akJ_kSNbxN+BJy{0NPj0TId?ouw5m}N<@fx
z8Saa}w<a<$Gx@Bg!+z51JPe*{+}M*;%(6|ZU7t;QKh-w+mY$y=vty8a)y>AC9>zHZ
zvDJL#qw7~TaEG^I2;^e6nrEwp?U%Y_+UPz*HRm;2SB>0yI@&#A%^H_z57+wL-Tt8V
z&$={IfIjOZ8mN(#hVRWE>ODd4Z+*mE{zVTMj!4HlBpv3ux>#+-xTAruo1R9~u&pg-
zw4E+tW*9yVcHd5FM(<07oxWj4Yl^!{h`nR+TfatJ%xp$YW%U>*6wRKEk*T<yAtmDi
zo-)nf6<c8`kpRy$XgfwTTw~Fbp8g}MnkH_DsAFu8k97Q%=RDCuu$d8AFnKitBLla^
zdMPq-Qx#$|dhtc81<=UG2;!t5Cq7LxIR5}@Yh{LWiUASWp6QM9BWUWpbG!k_sfHDK
z9GX24@Cp|unn4`+q}&KAQJD&SPc;ZiTmnh|0BWk&^{YJ+!b|JfRS`x&C%SK`HOmbL
zPLlHWMNp$46Wx5P`mgKz?^Eh7k;=(vrwX9(YW!GbmC`B8mN?b2f7OrFd*4%E$pjL#
zc4}~{4|UINu4Ia3kr;*tAc_^(lTU?mYqgIIVI8RC&nuj*pG^M%^#1_*u+cPHHm^6p
z;Zc~^(zf>zTtu?Q!O@062ERHqx0VT#SeUD+$RfU%{S5t1B~wY&<>X)^v8)=ME04-O
z8t`V_!#;yy#}v>oE7sDb#6ja=%EKUvbKneC7L93$P6t)uaz8ZEaY>PyQOAY{k2K)J
zqfs#Ditst0N+ie!1XEj%M^){^IHWDkDiI8x80Uqel>?&;5Nm>duzCgTWqnIdh?p4V
zl}A<Ixg1nC){tIXJQJXJ*kFTK>GcxBD@A#6EQ*=&dlIeyV;&Dxu+e1`Su8VUf&L_T
ztFP5hK(>#lJ@uqa_Re#FI;t%t7+Rt09xK%8^T#xIMC4?~;OUZGA%v7X{Th=@B56Qo
z+5qOJxwdH|4w+`5Rf>W(>|t70A_ZO`*akj1;M5Oea`q1@rs(_{oh5l~%HU({_^QjB
zySSl}-Wg8JXBpzBjtFM8NF}#_^1kL{kTcZ=^RaMRU8HqW%OXi3H%#S+njRT#)mX_H
zDo+hnFa{GFSTWg|Gs{&(!Lx|lC8)u0Wz0};nDaoZEK-niTQsc%7UCA)mK@X~IZw+N
z8OCVcLGVGr!R~_hS%%S^W~gJMTj_hmw3anu4<frhnIspssL`Nc7n=Fv#oWj;*g3B8
z`fgNc;JaszO?WTroit*WM0&qP&g7g)CTNm&8$iLMEHLahV2>2ri#Tp&c1QEwS5J7_
ze4fz+{%gmtn&Y$5xTeh&<b|VHVO`97#X~Gs&_)E1falIAZ9e3Zk|!k9t<|a2p$dL1
zJl1z`b<oPZkXT0!{lbX5k8=ST#RDCk#k9dg?jMS<eep%ThcuGq_BT&}`(?x+G0zpA
zp1>xp??lHed7@*~CNl+OV6<=vAlwJam0SbL<Qv!`U`Xboxs3baw&!n}D(Y`)He$Si
z-BDWWanB^Ms|Ij-6w!>Ntp*9cO%<9K9Cb_<jF;QDaC}fY4fLjVia~|(MQS(c6~F+{
zd!pzY9neOjGTO(x6<sCj&jD!0a4K137jQD3awt7M*{z9<hF3IVm2wo6QAMR{m$Pc}
zTP#ef4|R90>NDxuf4`DX)mlYq0zbO3DzWp5gHW?e<@TKkT6dEark>sc<tBU6_bq#3
zvpSV5Dr=iHb_~tcGCOl+ZW1y80;o0JM^N{YD^)BB<j{;a&*Q*3eKP7<CPB6XoK&`H
z?k@Y2%?qZ==>oj#&N-*ae2%4H8<^6#yP>>LO~u8k5gM}O^-!HV_a8ig^;O49nrXe~
zd^dqscK-mmwoR;e0a8l2Oca+1k1_iSGI9k%@<ls)V2qke`@PUH2u=o0Du+dy=Hl_>
z#?(`eXsmLQawHO6nneP|K`b`Y=2372h;pP=HnlN@6iWFAz^e;(Wz&`8$QY>L;*%Ch
zOOUAIRfLdw@mcLtQH^dIHy8t|I?DP<<Y<Dh0P<)pZpu02T#Ov|R${HyA~@`leU8%Z
zgs9wnMGz7>+je-Up|udg6-EX*q>3_+jo4aew)g{kosV|*q*X(er`$mq!;BN%J^uh~
zv1FcqG#t>p!*nj#{7_xkIOvLFA&@Epd8x0R&A0+G8y(RQnC)%;V=d&+2K8sT_V7ae
z)2@0%w)jAn*6%z20GF_7_qOP&Msh07TX@nf==E0Y453-FDb(A13m(a|uwDWX;0g-n
z6-$#NoYC@26{rOW0l39Mc&g|a^m(R)ns!O|WiF|1H+zEzHC|*;2N<iX_e778;<Gcm
zO6(Wzx)@WE<f8c}p5Zruo_nuF3XI4{KXe=sIFobqD5)n#0Tk8kR|sT_i)k855;Of!
zBH!O6KSGMrSQ}!C&MCjZKRB$(wA~B@u|OMP9EvV0cpWnBz@Y8bWSNOHTd?-ZDE|O@
zvQ6|#OQLjGRX@aOZPD4YG5AoH=+OMLpT$n5;^|7K#8q;S(;h8(4UMdo_9hE`P`a*{
z`}6<;xTx*)w}wx5d1tFzS&EIaj1g7JTOGyNc3L}&0OLL=cO^59sqnc3f_SM+X#H2d
ze4YcKN5N2frpN~r`<Tj{coaD}<kEykbApY?p=8WwJ*^ZD;m<V_sT?rMngK;s_1KLG
zBw%^2Y3pBApb}bIOPL5cAl8@lLLE*WCFRpWBOfeyt`xrUm87^QetFGq&}kNFT)IVi
zjQ-qeJ(1jdvq;Y5voYkFdPeDPHXd_KknUnLkE2dkmNXgY^<J70Nm0pe-Le~*H^j<U
zkU2G(xli(Ac1Sg+l}QAjHA8fOPQ-t;B)|{D5T>4~&d@R`BMFjR{Jc|Ln;<U-?vw(n
zHtmG`X>Z|&+Z#WXFsmz#$C{X`<dSp6D^^P><VP7KAMUmP0H^-2mqxi;Ylbas#~k>e
zEv@2~-GKm~s-eG##W4X0=BUkzGmSFrc^qA%>771!FRfPU-GieH@lqDpyz*;_ew%u}
z^Gmv0DT_&N#~;;p$mC0#Rh}#rVVd)rj1LT}!P&u?WkaESp17l;J69u|)qT3`ZO_#d
z8*#$*S<9GIQ(?KQC>=`f2C48+<+&9+Qw12#1xlb<YRr>h5ICu!n8d+bssh^#yMe_S
z2oJ%ctPYC^nOtOdPT=sug@l;%o~WcD*F97sB9;)th2p8TZ6{Bi2$@i3v_e3uf;p^@
ztE_byZUpwFijM-8c9#P~m&ugXeLox%7qeyq@lMhb`aQ`uSa{7(b*$d$(eAvla*Nwy
zs3-kO(Na?zC{`89&kXsN-G@gUvE%1)b*Nc8A96M4$g1M|a+0n*5NZowUug45xLhFo
zQL^fKPNut(c=sQpSQZ?({{T3%WOBj&ESE@@8(i#c2CK_WMF|U#YcOp5!#E@Rr4igl
z!wr*FPa2!MEqNTdk+_f8o3_C<>ESl<?+=k#TU+VlJ7ouovpR@q33tMPdo@dgTu=m`
zP`0^Zs_X8r=bD<*;2zEmaBo%mj_$|Q(#c~v*ckDewbB0oR19D(>?MfuE3o|wKen<%
zn=t<Xs$o-bmlSr%<t2Vi#aG>GmKLaEnspMl@m%v<r&j$|&ATP6BrzBV>bc$LsWls&
zOkT-577dUy#aX5F4AXa$gjbu^F=FBzpHpZTpmWx0OZk1*I46@_<Io<ii&eCa6UEAm
zAI)?-OIzEJHbzcs2479jk0#1BM_Y<<-k9iBA{($XO4ky|Hb)y1{Z@xg^vu(gY4QN^
zTJ1m6cCg2vY!XL!tMPhFr0*l5jGmzw{wmHxd24j%+S);G=!-je^zs%%@*|qtdW%ht
z-g3yiR$Equ9d{9&S63DoVq0R2Q_U=n*;B1vA&l%G*S!%8mpX_nIL~wi#k5ks6rTF6
z-=MU_iXU@tCbXnh&26Qzg#km71w(yxwHUu*jFbFP_t32W0Lu3Cf)BQ*nh)qp@<ID~
z=SH|}<G`T95X2r0Yn2gT3IN7Bsy$auaDDhk_9`?8%j}BC9pHghMfN7<ASmpbs<)Iz
zyO`|*vTK}vr+%R%DEe-g`<#q314kY?WtP~DIjAknw*5f$#4&HvblG=H(=a_(F@_1Q
zFFnbLkjQh%s(lWBa`?+T7Lm?*t$Z<n1gb#CRq3>PSm1~3<7zX!uk<CfOp+Mm82dj|
z;DpRcV%vZ7QreTf$>oolfn{intDGL|ahQ)#EJkD-N8Mw6LZ}~llig~iyE6i=I;_sR
z10xaO^GhyK2I$vJKuqLyQ(Q%J1g|VIG9!PaRc447O_e-mt@XQxiUbNWYYfseNw{SA
z`3bVT9@>ZgRcUV=cPd=qW3ySiADVA*@;)f|B$ZUINMAJ*Gi8s;&4!1g$*D*{kozm<
zw)#$;`v?LC&(&r>Os-j-pmqwLGhGB*@OF>)t~R$M-1feYLFu@lv8gV}H0E$|_g8VK
zWCuL?s@p44haenzt28RS4(p!L)=YAoekoX%Yy+BOVY;Yioo&}~a54F)m4UPy7{Jaj
zY3nimCAh4`_o-os5T|o~Du(;i7xHJ@$^+GD#iq$Vk%vCN1|P|t3hLtCDDq}df4Zuh
z)OHd4Oqn9N^p_V`41LL8Kizsbjxmx?=C#MuW!#y<)PGU!;~Y(?^>wgf5i?OgtT`Y=
zKNXZ)d~!udBHS_sTD?9eCV$kLsXi#uN9wbk#t~Desi2Wr*>J#cKQ)vQ;~<LnxAFR^
zFHcC3{-o7K^GxSotCISgTf32pKkXOzs@Ea9v$%<wWr@^}v0qaJM?j3KN1TeP*8MA_
z>anu43LKB*1}j!5C`OVuVfAS0eo{VT_Ic`ywS(j`D}CzE)BC+O+FNBaL_WM#1QrG{
z{zStL!K>1<p&2PFL>9LE00mrGm$Y@7Hel*<F;PZ<9OD2{FtPqv-Bvi-q0^)2578gg
z*zVFjA5wj?jun{qUAuC~fs}#|Ad2`NJFBZ5QEs4OjHe)q`g8PG^&Smw&OJ8zV7Dj$
z89mn%SEhL0==5}OX5Yi?>r8XSAs#8?1ZVD(2D$9kn9SmuNzO${=c;QAe^jjuk1GRG
zC=F2ogOSxp4hIww7`U$v4k_Ff?wRI^5H6`<2BB#swDE^W801$C^{1vM&@UU>Le|W3
ztOr%!MpuJC>sn>Lsb==`pkTv;j;mgiRPajt6z0ORBj-4zn%UMyB*&U<)yz^%(kx))
z^;!>H==11vm|Y2gpNYj}!>&)q1Ma<69FfYmMC3{?E|}|wkfS>M5xBshB>*AG#~sy_
zQIen&mGMDp5uCD<fsX3rDU-fJvK)+pY8#cr01x0iQ||@DlRnihh<g<jM>!5K4~ngT
zhCuN`B#X^Q8}dEG`5!ec)BJ)jBAcAD1I{TJNOdTrLAa7~D!Lr2VUGmW)~hf;yN2V6
zrX^ZU#NZmDV??*Mi9OURatRgLeGpXW<^bDCTH*HBW+8~%IIe^GZLH0Abe81f2b%If
z)OuW*Fk0yzhtcywGp{E@j?zn1B+O1~BW)eVeb1IW)vmJ`OGJ@QPC2T(S@$qrtLMqO
zw0jO^8xPVfo8IpWL~3^v+OSK1nfj+gaF8Z5@~Lg&5(b%%%kxx9R_%rh%Lan(;3#K{
z<yH_h_c3phJ_m{pDIM+@GW<v4jcepkAzW_qDye>veFH^$B)BmcIS1;ORcWBMY2Z`1
zcoa@;;7@h7=8`*<xfsAGY9~&?xmO4WOSu04salaKIU@ub8q?FZ_A^U#gDl+D)wP<!
zNr^xN=92n-v6YbH>YLqZwH^}!<?scj$C(Qe%}af3xib4<h^TGu65RaH2hCf+W1bR5
z2rEL`cEpvY1E$8NB5{CfLl^l0W5p3_(Zq!gD!*5nMUXOo6Hz_PjxNbBuT}xhFe+OM
z=;e=cVCU+owQVwc>p&ux411z<dnVKE1W%T1cSXWGZuu;3J7^|(M6whNF`v3uw}v?P
zU}m$cYp5N3u#~JF<L<Rj`*nzJY@BmOaM@JFZqZ$DaE~dDMflMMD`>%JfKyTJw=KCH
zX_MMbDOQY-0p^1ZrVlIKHQM`GE9Q>4_Zr(^KP*?Im0*jY2owxDr@J9WO`v(Gm8(cR
z&!M(WAm0n+kCJGZE#iqz&<1;{;}Edkn8~OwBv?~%VNuDjC2S7%-DR0W@-xL+>@ivd
z#~7h4WP*78#ypOwyL+f*Qy7qBc&ThT7Zi%?d4;sa3J;O)r@9f!x${rGw+=%Qj2d;y
z#}%&ddK7Ioisf`yEX-U&+<s`eE!ATO`Br8b?ryafz$&VEtE;Q!w)Ufu!7bvimM6I+
z3#19CLvbqYj8QgG+}m4VjmH%&#l|B401|RVdctz4nT9{Q4o_`RQg4tegFM;X3W{4R
zSma}q)fI7b6pYIc$o)`)3F9X%fN7I%HPJd5e%{zIq?!C?fwqe>86_tJ#Sqqq+{no%
zx{;Jbm?Ex=Nm46rfSN>>;3F>p@kHuF*@;gfXi`7!6@bnST3;u-W(gVMqnF7H*3_As
z*BHI3i2F4{sUjY^tqY+ck-8mKJiiuU<X1-wsZm2p$iQ||F+;7htibjO=B5&oF`jBw
z3;-w1T%_VMSJ`iC6j0i3Q_Tr@-}dZRvs2qP>uscQQ8a6Vxc>k`vd4*A9rCEGfIYqm
z8Qf~^C5XZNFmYAT0@5%euBiCtjyOnl85H?tegO#c$rY@M(w}f)MIJEehzE2U1-i(O
z6kIZ`lq5Geswc^6Nit|+nib1(Y76&oY_Z_xq5lA_M<Lj%bDU<ZBZVi37CFr;i-Wl#
zZqCgzFl?!*E#=xd?tq;oo<J62n4`Uou>u<{j(;`u586Bp_+rZ-kdcZCbR(n1N?45K
zVADz6*P33#(E_m?3Xov%Yt0_fkx{EQc@;V*XQzcFjiQl5A>e~tbM-^e^4k~GblA&8
zd1V8t?GYcBJyKpp9I&!7hejA6)!1`O47Wr{#Ys~>OLZb68C)>??wljt7#p*S>A$HS
ziT$%ap{K``JhFk+b1S3mkcc+Ed9P2V)W<Y!9Nd_k853h+Q&d`<Qb#W0F;~z;WpY^K
z6<x0ZfS}~Bn!060BP$2o!1L8gi<r*fRQSz1b`@E<KUDXaIr$}00ytQEe;`Q|<%@Cn
zoB_`?i6%G;#Vh4`CxcOtJ4Hb(ti66|F0_d+ZxY_!eaB;tD2*AK<<`>90%dM8E3o<x
z(opG8EzQb@xNdTMRy|g(IHUVCV8HS+r_vv%7M6Nu`?L*k>z+qdBobX{I)JsGCfJ+<
z^<5DE0PO;+wXHG>`(}<s8?nK!Jv`E!u18`R)D<9jQ6_{+gfbDq6wolhFBO`zySvb@
ze`#`At%=~D6{%ltScK8AAdG@4w;JSh&pVC#N$1TXf9hqrsLC(}@kp$DG^{BG#VJ+<
z9zLqd2i!6aDw#4yM^t1E%AnLlWy;E@5wNK@81OtEs0iPB=QuP}QVpS7nri@TcyY<*
zoTD5bs6ttJ1A#<D;n$Nugj$B5YkLF3JSxiTgT;K?`oa2fbE93pqhk|2tDKw;tLdaT
zZuqFRdn;{1(e17ySB=I9BCN%g<CURuK~6N!g?7suagb=MTkF_JG0!67^IbdjPxSg-
z7X9w6V=Zd!o(ENclFVmlUaPf>W@y?uGmP;{i!C$Nn!Tn6xLrrttH?i4uM`6$V2aJ#
zSTT?I@M*_SWCx6cNt4pF**_?j<Mj;i>PEGFe)A#Q5#3d~m#ei)h}tDP6#Z3ZonSWs
z*)?W#wT*)~$f1kT^tj!q4?k1XO;+O_aFR_UNZBk-NvIy#k0DKLCDRl$ZC(#lH|z*I
zO5-)q_UnTSlYFxyBo3phWLEM?AVpGGo&c>??w`6uiZsiU&=u7`Nxw`LrH@qAV)%?Q
zsTG?(NaI%`+<4`Y(kTA`OaA~*b!35cnFB@_m0!C0C(yUHcQCEA%Bgak9QmSPwSqf!
zf+*D^QGfudZBtE>(oub6NOd_lt_EF0$t&=6@!(L69WD!WRO+83p4@oxS^oe@`ieX2
zaSY5Bbr>M|t1J2T#@y1`D5LcoB#M6P9XbM8Pc?U|+?HZ9RSOxBxT^bOt`SKVnxj)g
z{+2ygK++-ovM52uD>HF#BvQvNI0w4dX&)$*lb$Nr^hPGDaM9YaJQ6CWM!blF8xS0y
z`J=r~ANFoR=vGfabh)1e{z`bE<dtUT46zh^PzF*6&T6j4U970%{p&+)tF#f-OA*G7
zY5KdR>agfi%cKLy`R(q$Xn5VGf7}M&pOHQ0zT>-sIihIYLn{mrYl42Tex5bCms8TA
z{{R`wI)PjC8RL#yW#q}K$0;jE8nF)@*!KDQHMY~2b-5BT1dlb0x0l$MDNa=Ln$g<c
zG|`NlEqXXU4qIH7u#*!It+{^cIW|oH03)=Dg5F>y!t`s`OcGp<YQh5FvLkzI!+naR
zzl;TJ9FH|&a{lC{$7|hJNhi4Mn8`GZgZ(=Uac-VP`1q-Bj8blu@w9X*eJU9zAGbxw
z^G7C;8_sRz<kl(MW(iG4BW$VCqB5~@!uYEJ068FnJl2a!&~2f%kUNZlkPT4ZJW$9|
zBhGrDl@#TZleAc9H&1PO7q&?Gc&^>0?UKqDcFAQ1zJ0RPBDT6$mm5&!tFHQYUAMWi
zic5JzARm2KD_)hRcfBROuFc$gLI60St|drRyD$Upsc!v6Z*6W%FfER&C4H>j>Jl*A
zQa`H6hfO@Mj*Yslb}cgbK20vG>R91p9l6}!b)37@Z*JI^k%0OJhKeO)-!x2dZCnvu
zd>R;I{hm)->sd1JogyfrcjS!zXoZc4`2&h65q4mGRFTAS$-p(fIOySJ)VJgnZTQVg
z6ddtH`y>!5Rf}ojt=Nuq^Vlefj(&wKaq~wcWF~NN{Zsv*MnJ_KsT8?BzziguQkb!T
zMOk}5$Bnpey7EMa;>Z2!N-2f&<gYM*nFE7N*hh>}<~GRy@lmv7oSziF+M?`Pc63)t
zpyV=dQ{dJo)!vm?Subr94UClnx*I0d&NonUs{lg`sj4Lh7ijNjweB>T!mc5BRgu>Z
z^-){MW58kGb<n+U>EKY`Td3pt0bKEp9hJD|nliJ|<C>bZ39Cxp=m3ss+6Jq4q-s&!
zTF6z@=OEC!sQ&;UH53Lgr<}Kn6N-|iTANPM_7~|7>K&)4G}!H~WmUUDf(h=rP+)7|
z&r0hznl7sy)vT<Ht&@Y@eO>4;)Lk=F(2{G(woHqL{MQRrrg)`mqos#3n&jwg!#*jp
ze1&t3PxT_rbtM*~+(E&u7pFa8r|R+uLj;iYD=e{1E27wE$0tdeGz4%>DI0KW--_|T
z;F_qsc4^!msx8<&RER<5rI2^v*Se<>O(c_woe-w=Mu%;!SUeM;Cm$2^YlHf$L7POo
zdwG=X&&8|h0oE%&>mN^A>vj(uh&{kQEnBo&$AVP)W_?Blky`Be=~=>n4_#2#EFW-U
zF^)}NYMNx4Rou4MXhzfVHJZOG8!;f1J=dwnH8`e@Op{KTM)Az1Pi2&+<IPmIQex7A
zN8+reof3C2e;=w5hV8-fGx(?=GcbAa9yuJ;FYZMqz%n!46Z=L*_qqVk(w1pKC*w-O
zO?w-(Yb!6u6+>{X?lFvHX0<Yj8pC6CRCn=+0<?HJ>{U4jK=xSvC;1-hq<)&|?QL?i
zKb}FZX}Bz*hZ*r*571h<M~*3&0h|L|4_uB%rk2C2>251B+;y9~<r^T8Nd$L7T*jW?
zxd9ogD@fqDwi1vWZK~IJ#N||A1J!)BB&2(|KaT}!U0%y@7DONpjcRnM0f>8q^+CWR
zmBO$UVgyh`M$wbiE@>yiK~DP?U0Y0m`%+07x%)Jbi+Sbu)q(!1hW`N5w%3;;;%N$;
z<a1X#ZkiAX<CKL0tDG^?sd7AC(KV>$o*!z{l6=zW*0)x%1eXkI%0w2h?M_(KmWEec
zvJBNue;ue~?O%cR_b3j@2nMsBwYiyK{{Xc{XU`a^{YM4DTt{;g9uF!i#>_=BER!hY
zbZMZJDo>IwIYw&23y0IzTdapZD?_G7C+x7SK?1$1A_Y}Nz|Bdgu?dXgr02iEB_~$E
z-aEf^zJ4IlvZuRjGBZ(M`>fy&6w%Oa_Ho}7jU-7;v;$=$D|=`DXqN?n`6)k&5?hEv
zKF-H=L2axt)mjKaW12C?DRhTXO<OeIwhX(7!4xInay_+DN%K?BbtE>%S^f$IErz3K
zaToa5I}t%fOPUoGE2ETeclQhl=NaOShT`fK5dpY;*RDOuBq3CRM8h;uL`1w}%`=>z
zVemrWv@P!rbDCr@+fFj72HxpK)K^k_asL1xCW(gHI1}J=p6IzZ!{A@Que)`&V5`~c
zf!1KTo+Juz3Fj4IdhtA%*_QBXdq|m=bfc;<dufw!9jp;SWLKTrYNyk3O8Ts+Dv&pE
zT1%urp9-q9jEzHf+<s}6ULE-!3vz5LT9Q}*3z7{o&`CX^_c+MvjMU=yAs=|4bn_{5
z6AWXcNlR^@DcmR;-q~W2vM_$=4O-CLPaJ`cQ1MpU-}qwp6yW|STN$Hh1I#(CFr1Ry
zijuxVVwEF-113ln401<w?5o8`G~(ki+xPcS+UfTe`=l~uk2JOAktCNP9>QDhWE}ab
z>${oWU9lYgYHVG+g<CsmSjP_0v=6gS9v!|yhuPc}j}j+7O%14Aq2wAq@<>+8bYmon
zrAS&}SgudiG$U_tmYe7%!dQ$<%!Id!b+psI@gm_oR5C{-QQ<}?yEz(Yc91d!G^?}{
zzV0<7n#tctO~V8=sg-5--5-HeSC(NUW9K!uPSWhEOX#kK77y+*{{U*8?vZc}GYns8
zI26aXSCA9Xt&W@Ik-r7bn<F}rx8aWK(ggw7o~Vl%B3lxna6sVD7bV&vtJ$oJnX>25
z+bNNqj(MV5CYiYyTB#P2tE(ep9o6-+NYD;{x_q@BQx#o|wuDEYXS(o#oyoW-A3~%A
z+p!~>NM(DXRaXPas$8FAMkkgFIerl+1bL^K1(M)NpYEiF*&@kP^7B=C<&?KS`cg8V
z#R$~pP4EeE(ccD1;B!Pq>;~84^GhsyS8~)@-^F{*j{^B6xQE=2E7+kFl{g}&ibE4%
zoQhY8W^AdWSI|1_jd|?TWr!YDp7FdV4b32slZ;Ra3US>iY@B$dm3MW)ro^~B)CkL~
zc;<<tWelWXgIr_vGtm!h@2F|<lNsfd4y(3<hW_ds%V?*Cqme@^3=nGU8Ks6>q9o#`
zsh=ni%BO-D9%wybLfiYi9M@X?N%TRscfPQU`$v{l70Y#KNVVADjCij}hfy5xv~x0I
zaD16&5=g)(;-1-Gh&VoJ%_9WDi~wnXsa?!@rm~^v2}6=FD2*dcy45b$*4j2=Pk&V|
zi>2IZmq~4EtlN(#z^>isuS?qKcB}CXIT<WPVAbm5iBjy-g9FI_07QO=^o6#IZjwlW
z=OZjg>b2y5?L5-~@#ehW0lOx={PIg9-6O9EC@Ms|lwf(K3%;O5E_aG0Pl`^*=mS~O
zAk;0|Hcyr~HILKot~4z`*<J=V<l#y2T^cb}n*Nn-sX)l+H{*j$odT>4iq8;|Cj}29
z6%t3=ANZ`+pLcVhYDUiYb6c^(lj62$u}^L$g@Y*>1W<9VLZ5&@%s5&p8wd_CYBo|6
z_~6nUg$9MuD#9a^3<#?TWfB}?2C3EXdghC_pKnwUOPHHz*xZ9o#>164S`H~Uo=p)G
zZTOyZP$5V`81Yg9MlvXM8+lWo#Y(c|b5MjDrk!i4TD`r@VbO*TYlQxxdKO(K>F2ji
z*35Xo>bv1{yCbR>RkMRqw0n7wGPnLJ{XU{tWoWB56_Pa1mmVg!9@`GOq#IgLfk`A*
z)7O5Ln@7DeqO-!sAmggY84@djA}&a;O@}0M#Y!g^CO3`K2(W|;pt$GAtGVsh;x^<T
zHAihD{_ov9WY&gTlGdw~>#SAlgjNZ03|R;l<&NsI_A|XwHQgse6l~JS?x`A+A>#tN
z2cSI^(nz|Fq@R^iD<-Q`95J?EhYQInK>q-yo`m<LT~k$$@~TC3*ds{MVu?US7#OBo
zs>K?_iWNr#6jW$J<PIy$XU{Cswsx^%Sd}5@q^>%t1Vdp~83vI79n`zNIjRc`wy|S1
zjmPh-$x^2z3bfPp`1LD2jKOD7f)AP|@Cgx=jDuJ`ZV7DfJ^NBuoRBIfL2<J8Q<Ux^
zmrD6K2C6N!6uXE9IFNQK&rEzc{^^%uGgCI6ml|^_uge(*htOo?#K+{kVy?7F72*nc
z;;VIwHnG%2)I$-sc&if;eFfe?l0wNROfg*VQ?Y#?Q}?0BjgD)gEa^O1?x?**Xf9wW
z$KcUpqB}iBuH%k4*XQKcw@#N3sxip|vHq5_XhRnVJq*^rPl$n%C@~x4JzI(%_S!3$
z9vP4_Fu?|=+D8@d!KF+MA3J`oexD68S#>)IiHVL`K<c@snQ?++!Ql1vUt#MOP~O`-
zGoW-~f(WmjzfpY?CWoj=dt)B!cR3&qtF@rk^X<&%>M(zFOw?qB+#8>&Y`18D`Dzc}
z0r14F2X!WH2G+<m+T3C2e3Q!4{09fRYevXRgZZRlA|bbop6Ml+6$MU7H7o{$)4GMV
zoF$|K++oE_1^JIr6UZ{8^;uvx%o`^^Rbw5(dt04{$L_Mpa&xmm#zyYyTU)yqk|!}f
z-m~3cnZn&>R?j#TQeVqFSd0+3?tr>X5c2Q`Mv_jIi3qvLBS9XGV)|T{639Mdfts!K
zn`%03+>xZLZHf4UKwIiBb1;Ei`)lI0npL@VTrU+&F|6*|;J;C+mWDf1<g<aKxWRHy
z_g1!+%;YHIqp-9tM;NPnSVTvYTO*A{Qas*mT=Gif#-+GSE;*u7Ffc&si1tvZWyvO{
ziUNFQtw_a^Zs?3aG79!;b%~Ax3YbTdHuflpqb_iJrZc%)1`vcicTy}{k0PvMvn6u$
zd=>?e1cizF#SO_8LrXlJBA;V4VMrLKG&>MrWL3nr1~vIc%l_0HtB;S03Q311o_PNN
zATtv`RT+!%N33VD#UrM8^G3>?Em$4A2+{yf2bxREWJ$`Y_|XPEq~p4b$G9FzH8~2U
z8m>zDU?3g5tqn8`(U2E7?A5@GAS(Qdf!xCEyFpeQ(n!;ZsMCBDH#V`!Y{=grMh$V_
zTIl0X)ScHNH#rsDFA+$@T^hyu_evL<Y=i)0QIS-BF3&@u)Kl7VZA9(^>Vt_(Hv_6G
zQ&)^KW0g3dV3pJ6yq`5=JByOvOtq5c<RYmfJfEuR8WoJu+X(H++m3Nu{{YjPjF#65
z1Yx)SE2ZdHuNrJn2H{l6Tjb52Pol$5u$7qa12t!+nY8<)ky~jDF^U!|hK%K#=4w(y
zsAqhVc%+)OML4ClNuBRQyZz3S4B5%rGen6uu^6nM>E-K7psKS2wBoeU$Txjgoyi)L
zN2|vz(M%;V=A=)F{8V6P2A&O7(P=1e%_tc;sFWPk%Yj7^iNP52QOIyGYDXXpj_7$D
zHh$_hL~{QC*65&1NM=<Y;XIn<7fg+2a+y1OuA}<QN4IXSc->si`J;PhjW7<=UZ2wc
z0O921>i+;U5pv9vF~?_$Q*uZOQ<K@I+{YtIKso1%MmrQM50!L`WYaQx8v|m1xb}!u
zLz7Zj+@N_9RLI5!K!KI=s1Md@8VGY{OCg5j3Zt2F<g<0+v;sE$uyOw4sBid~kSeL6
zkOaiM;3>ePv}=h*p&T*4o+w#aTL%Dr)wibOw|n@&9Ot^ht7&3(YSO0iBft7WQU3r*
zh)c-{%{JoIp5dj7_|Sfya`plgZ<gE)QL&557_lZBCydv~H7Tspqv*PC_`$z;wz372
zhNEonl3Rt(%QX$njCzjNVYH6%OK)fUdBUJ49aVBrYW$X3d>6Nno0tc+@n9%5mgdz!
zlpn0r&34vtZ;?(ZV$^M~09ODJ_gQO>iz3=p;E+wdLo-xOtH%t16z(UEJX7ujGB39g
z89!9WEv&8nE}@+HH5T6_+Ds%DEI0BGHDe9B6bGzzLBj-Liz(v04f4qE3O^~wby`=^
z8C~j4KtUp-5C@9(mw6S!51t!6P<p0`J^lEeX$Xx^0=*TQ$8-^XIQp$yAfewOdt|7+
z)JE79!OkgC>Nzd;#yKMfl2{sBJ)w?3^HthpQYFI%IRnLKQMF`_-MpSL`#ZiQhBUSs
zOe-<E2m_vI`<yakZAHFFW6Pc?l;n^%Zs>zjh~3+~4&M{S0qOlYG}mIgm=or%EoHa3
z1%T$JF(Fb%6&6;Lis&skzCmxbDD9%&>BU29b#Hi^fgdW8YbBfi09a-}s*m>ei~cyi
z2k}8|+s|VtuLR9+3<ykk=bk7jrno2DgFll-M`GJ@WG11zk!@4PXs6HEs)4OqXl4R<
z%VhUgGD@Dt<seY8+#_28P?L%ZD~Gt5qje~Qsx;>8GUQoq2f99dQIp(S!E77>yY8zj
z?6zIIyB1#n)E=WRBvGUQ1G}1K2)QL;?}T62UB`JI;JC#`+g9k~6&8;RM`g&tBC77L
zZiV1QUygY+6m7K;Hm$+e(C!7?Za{KqXk@mze`+-Xt&?f8A-T%)R4~St^E;`*%~=O)
zp33@R-?NU{Sg)TnD@`xD00gM+qmF5%y2HL>%@?RoZv+`pMcRDwH4VGHjdp<_C(S}3
zNq~7y2h~MyEHkF|#y>R6*;tLRf_|}1qu~RhM`)P)YQW>gHUu{EZX+Q0p&+)FH}@Yq
zjT^rS{z1(&mfo0YQ3d3}L$yW;r&%Lg0%YJC2ukmHg<b)v?&NEi5G$x7#%Z>`h3kSH
zMM-lanyl16^tTaLv)lU?z_G}xeR5dR#|hc37#*}#KIqL$as=B&7j3YqENZi01haQa
zw+y5K(XN|?^c2!GmIYb{s8@pAaZJ*ZPzy+YBv-WwmMGblve(XwD~m)7{{ZFhiZ}O7
z6}c*WbySv@Bi#Q0F9nYotIK#}Xu~rCDYC&vtO@c#Z+g?`-9n&wG;CAC6A+w}^-%*1
zvKUk@I^w3avX0P~9Ov$-md~ml+Cc9m;=b$<x$3O!?je#eO5wcbf;XYAeYanT`lemq
zS=+8i!Jy*h!a(bmci_Vr&f)l`x&}Lt5&qTlZiD*CuGmBD-N0o(c&kVKe7J;vlTUy6
zucJ#xm3TgMvqkwkxgfC(oYKbTCeMoCjr#up6Y|=7gHlKO_iW$;VIehF@$xSNd$4{_
zpd=prSyPiilxW-Mkz7{a_2$kt8L^NLs<Hn7^6#R~fHjPPsJs(?2(m?aI@~*05uRzz
znH9*bzgsj`e16fut4Kdsv_=DTEs6^UPx^(<KW9Y#`=wGm^H?wc04n~Tll?f&ApKGF
z@i_MYrFe2L0aJ_eZtft8;v*c03Xy^haj(^XM3|CG+X(X?Y@l^pD}U6lPQG(=i}%e*
zH?Mssb8hj=cM~fN5Ng~q%Z65vvr(F+e5oug9L}sr=NL6*plEaI_lX23SRdqZ)oVXd
zH2(lkKBzA=>$uEPH_1nez4XtheJP?@ByqZXxcN17vTEPAaa@@rfp4bxG=7ov&8CTH
zp%58J`8F#;3aR6o$$$Al&_EzcKjxS}P(41s{+(-yIAxo;Bc~ZcDw8%@<DMzN?ilP=
zEdHSA1b}W+AKeij>IbJ(U|gjywLhTmkJ+#bv|(yUd#sB3{{YjctF-?BnsWZ3dT_SU
zbc6GnKGfHM?~mEAEwtcNZ1`+eRQmVRs=xmG89&`L`uEd>VBW`{ywbdL`XZcu%U@CH
zaB5bpjHBGg4Oxu?Q@GG>&4un-0&uK+QPO(r)4*|kAN<u;x#}-V>NdoF`XMKSnlX%*
z6dlR=Hweo-Ug|JpZiu0!1UGrdis$~C_2!$WOl+=h)<`-Ho~u_Es(m|jL5om}pUnwl
zlJH2*FUe^W64@h~c{n*!%~yZcG@?%1_`l|sElW;j$i0vLX({~=Xt#<+Bn3Q*v9*&t
zIp(vm>ULs47V13z0Ah}ZP_|={aGCz<Q7pYi2-(g>NO?`lDyj?E2*H_i{fa6(n8#A7
zAK0J}v}I-5cr_xBfKb8Fj^$<l0FhHHh(}{T-2jZ$^y^Jh#qF*mRt!7(t_SN+NnGf<
zn=+v`%-l9B=ndt3jDK}st=Pe;*-38z<Ukw0GAYR5{Z_p$so<6PGv~mvMvtBzcy@&L
zz-9CIM6LI~ZjeGVE^skL`m;c|()9*`W5{!aYS@08`Ysz(7y5%MO5BAfs_o_1V~#z^
zoXBaW=d_>n@6pVKbqz>vbIPf%n;2D4h@b#GP|<rx0wjxp<06VjL<8ifKapN<Hfd&-
z=^dPSqm5D~j~K=|spDc71o2QN&*iy)=B8JS4DVn0s?hA68zY+WJpL$ud05~w<NFk?
zDB(jd{ECDbb(~>qKdix(B#!Gq*gTRy=Bu?NStBY4{{Wf{D8XJM!#}wf7^|HI{bnIp
z;m33~qZZtZjQ;?2QVYW{k|Tl-Jkdmaop;moX9G2PsYWg|`4o(>UMmIZEm<Yn8NhMa
z&2H^4t(gI77xPY2<jd%nP4Bg<saza$Mru%!>NA8r(=~~$#<d$;?ZF)FqpqV3WxLH4
z${205N)<q>dr#s3?z6VqgaZij1!2WrY1a|TJ`N2Fu_=}SaC6;8!}C&-Hja6y_b-|h
zOCXqKNUZ+9r$cvbBr_moVdk_lu*t;(biUxl6zB~3qxBoo(&;yCJZM@5K0H<(qzdB#
zzN+;mj|KIlkWPV>hC1rLc=hL|X3+HpZKT_=_>tnfdL3r(g!?(#j1}^UnweFUmTrEk
zYZ#hBvBtUl(%Qn8ED==ZwR$#}6|w`j<bLaBr&Sy={gq+B^1sj(u9k-4At}y#s~tzv
z_H#J)&IgfRat@#dc7^VXxc3xp4OQTioD`&Ub+2Ytmq9ifZIo8aY9a)mHIVh(5!$0g
z6r_>T)zn(Kf_%Ri<29bwF4t1kkwD1LHDs0WWR6;DUqGIouyG2?LEL$+gXz^WLKmVc
zzeu!1iADun=_p{mz!(Ougj81tBUz|<q}wzWh*1X=ba96H4(dBt_YC=~D<ld^p3Pcv
z+ilR)Mo<7=DClA$BeZgRsN;=+Be9yXwT$^7RANV$K=DFz*EC`xLc8AriAYooj!jc~
z&8CY+m7>USGm%Lt@Myz<Zb_S8Ojy|56?>bQiyo78ZLRd^5dcWnAJu$`>1|H<YQlN-
zHw9d5O~>xOz4WEQn?tpdURcHsYbN&3Yllw^a7rUDG`J>+tHwF5cduA}#{Hu0d{<Mw
zmNi1efTNn`UZcEQy;<2#P<2m^PUz?QtRd}N?5<@TU{q|GYzmplW1gv<9&188)RJUR
z9tSkV3Wqy#O-{@R!4y1z=i^0kR_ZT;E0|cmako6xea{C4sOjW(z$dB}^`TgS2a(+#
zyAi$Dv;tzQA05>$r4P8Q0scaYz16LQ6$Bi2RnlL@AUk{d%~3ZKt);`8hg=+A)|PE)
zs7!@%l1*imwu~)x&su1wRhQgS5wTfIZ$#ZC^ld8!i*P6Upr)ww_~-FfQfL=O+Wsk|
zh_ZvoBD&syEhirOfO0d625k#Wjvc80I3HCen;O{LpKl=NflA$!Wty3Ms~b(1UVI9#
z)h1}CSC2bE<wZpvroqE9)W5zGx56Bg-xRV|im}gZtm~eDzSHN@WBrQ^<K{EYYg-HC
z*Ux^H)NXWLS*~Go?J@Z!c8^T!mfEx-qBwkVwZhMaP<GEvr_T(DQK^BCR7Z}edvTSb
zkYICJS~ezm3Fz0Eh&1NLGf=V5nqGp1+-f-#az!XRP7Ns>6HD0L9GCT-(5<+B_i<dh
z>40YA18ys%ezvezx5r_)u5)t2TXI-sg7~jl>1+LjakXEWla_EKbWw)QLv=B9RV$J_
z(oHY7aPAYF)EReSx3X)ZWF%Pg7VtT!{{YCcjm=NYvF1GIJmQ*)`6YoF`=nt-y_r_9
zU`NRLto8T{F<vV}Wf!$%&nEzmGg<vg9FodQXX>k;Ln5}|Dv%WBiqS5=ZG5Ev01#+J
zQjALiY6h2Oz$2{IJg`%Pi!H}3vP|v%gVdVl^&yWXPiD7T_$aZtJPPN(q|a{AB#J&?
zb<#CEi+h|~w*+=;<sP#Y4O3BF9{&K-WaW)tC2hUPTwwQB7SgCY&oy0r1Tn*yPDt@p
z*4mWPP3#Dp1G8NBjm=z5^qaFyIcw(t0kn9iL^iQYwUjGR`eoA@u&PEX#^S(UNF;Xv
zkE+ci;_0%)&_$wJq1-Y505u)lNj#%x81q3~+03%2id=!+6*swWYFzdOBV6`3T`;(#
z_M;%2&{j4=JE<oClE$La?kBp;jtEe3S9Wc<V~!|W$7m8GB|;tiRaAji^LRN^M(ffD
ztsS$_@mVV^UR%|5jwTxpGf#^grkh<481Qbb@aO;;;)1ugXs@3qAoMB^Pu<93{{SfJ
zpQ+rK?iN*UGtDHEi=wzew?SH&5ZmV&HBEmd%(f)Poa2hH5~b_}XEh9a(ZnaJXtm^t
zZQ#%PVoT?d<D3=2qxJjwtQI#?NGAkU(6@yKQ~-R?ceasS;TUe`BCK<avTm*hQ*H7Q
zXED06SH?0bGiUNIo@>V4;e}Kj0aQAj_N8xpsceksPc*Q`=PHR4`eoJ3)5O0Ikgr7&
zTt+0Ef3+pF#6lk7aD3*UJ<7W6x5E?lLt|V}Z@`apZxaZYzXFx5ecVW-59)(9Ex&WP
zG9Ua=a0%@I_fFwPNiLYQybcPgLY^o~+fvNC2JUl9sOm68h1`Xv={k^PEUCC3G&ak+
zA04hsyF>!WCO%lIeL_91c)_n)>nkPWM5uBPR7K+~pCT}5q~Q4~ahB^Kwv>qWuq0J>
zq5uW4in?EHLy!hB&1N-gsb;=dAzy}hHFgN^d<)L*L#(44H&V|JyxSkDN4XX@o1Anx
zrzOz1wD(~`b4A8Wk(;@2d2FE|Lm8<fvukd1kwL&NM4sWw{wdPj-AN#cju-BYOYDtb
z11G&--Uz#=YEpbuBGmr?wzh?R&I|b_q9ta^Aq3G<SHK#z!2(B>41=FFU2$ku>)#R;
zY}Kl7w*^SfXbXYvxd8K4!cO=SxhzZj_82E54r+?>803rFVa{sO(nW#;9EX$DS6-_d
zfWxYMTrp=FWfjeWhCJ{nCbm~$kMz~1u*EqQT^z%#;F{^<l$>l>$Lzbck9CeS$P_g8
z=_Gqt=M?=m<u-d{<*D)zyPC7|q|0colFAuz#tQXTb{3)I!F&NgMKoqZj_AQDRQP9_
zY>rJ)6jhToq;Znbw?B0pX6M}4#FhJ|C%ydePvWGG{Wtq%4(hI@{z6Z(-XaTt3G-f?
zOSiqjAW+j;&37PoZK^o}q`8*j$_&Go#~7ta@wM_s%pdI|kg>vn`KrBQ-XCk?xyRL9
z%(%BQ5y9%Kri`=%=l!egvWfC@FD0-ib^rpK5rs}R^F>JuhjwP*8W;rJJmm9KQ7rQn
zX5>?&Nd3;SNOr|>!(>xsZKruX^-_UkrIa{ryl0xyS~a}3sC}`FR$ez&;kXrdq)M#;
zML7V~kVLTNg){Js52}#E4%H?h-zJ!>S#VDys+pX7J5NHb+Nd@$VS!xjjYuP!gjq)6
zz)&$qTc|PaL6eGv?k(})`4p}QmdY(c)=2P3_-CpV!wKb(3bfVQK*N7uRbJ6MoUz9l
z6-$jeC7%7N5x6ErLY7fUl0Y2Pws!?dM{YdSWs9?w7*RuNC6V0e@J7lq2tGlp3w<IJ
z2sj5c6|*4++<)q?G?zwC<-qYx2rDJB1E54#BkG}xKu07F#b~Bv_JCiDA2kK32_YfA
zS*Li@@JnSRx(s0B<;5~>3N$N|!4;>RJc?uo`xF>|b1KV%C@f3po0*l=G(ChBV~_5t
zrm$eeL!6JzY_%=P2?S^8{Z@M5e6!}MmMGYFGPCv#w{Gr!=>@ISWEkaj2k6zTQEnri
z=#{r(N)ClDwGwV+Hr+{i7jlpHqHNPtd>`LR2lqxx8meb@PI(nImN#b@7@)92=sx7W
z$x_c&Dz2CFXZxvbA=jDkFXsn8`>NYbkt9@I+)<P7NmtqfABwf<@%;s5m)TbwUZPnF
zoknqw{_1ph9;76M)Mw-W0J^UOJcMpuaYaEAEN#FH)W5~V{0y?s?4^SD)hM|dgum|8
zHP5L1Q*Oiy*$WT;>e2gK+ZgCocCBQv{Pq)`_^PL)!}K!GlkBM;we@PL#lyBgH9eQ=
z531F+$i$EChMp~qOmQ(&_@uW*5uMQ;=f*0(;bHm@@^XEI>fWl>ukFvi6yv7yDy}VW
z)Pa6FjKFvSs(GztWHJRf`lzE(#^H*1=`f{KE6<U;vfj#nQ_FAeuBS04{{XtBhJRL*
zfvHS?{%UupZb-~s{Z+Af1i1qP%~||BFZ)Qd<ohdUyY++zVAQ22{{YsgTT=CwIKG(w
z0KH!-8BTr94H<6>#Eb}6<06Ofu-7O){!g<RCb#P-fdckykIe}Fsr8g>H(pJj?zSc&
zCP_dWHC?J$t78BUx=CoUSK?3PWcxFcdf(OvG5g6#{^)7_dFs4v{mi)Xb5>VYi4iCO
zY}Gx~BoeJ32hJ+Dqs8<e<YfB>lh(eYa!Xvl{M1Y99elPu=jZcGp58IY=Al@CY=s!4
z{x&t>p8ij>fBI?a>%ZTa?`LIZ=#yQJ$61i5KJjHgn)$-PLJ=aa8|+t5^d*m|FOeiv
zX14pRTFn|ue0VnLbq)Mk)^vAkyS1Anfm=j%HlEuKaaq4g>8%VvKY>PigI0@8gB`i(
zS2Lt<t15j%ZjpbL2?UyEnRzTgY><kqzSQT`09~6!GV<xP{W42{h1~~LT2{d)L|>?%
zs<$4Lh7b`XcJON!pZ!K%V-IGT{;QaJ`uh7_)n4L#!2$TG@l+Pte)2&BHQB-G>Br^S
z%*(23UmlL=pZ!IXjrL>yDnb7M)HSj0*B^XW4s9#py8{EdVre_F+y$#&rogZHb34KP
zeA6aONo+^^R&&)Ks#+GgX!g?Eo_aX0b!VabidB>jIH4a$RgsjR<4nCi3EBtpq|4hs
zQR$X>WkSTBIIE2})zMx!ju>0}#bhk>w2<R&K<w6sMYWDO1aP1j?xtNXHtqH&^Ct+>
zFYWU^&g>G#uVajU)y$|!QN>f*t;9z$$Dhqx$*3$EODve)M>(w#TVqJ#<jL+$wq>>H
z?Lv?S0P|J)9lFbN?w}+w<26(2%~52XE#YC7ZgE;qOU6<Z#|IS9?9Hh7WB&jqw@ZZK
zaw^_f(IZ!len9eQEj$vc4^??@13X1>o+{&sDKz*C)8E{;QlRI}TiPMWAZ4nXN|3<j
z7Z@H5X)M&0+@_xRGdSFWp@uMkf(sr<`l4;2EOy{jFh-yfPc%?spl#zGX&Wz!UPUXj
zfl%LDt<}3k;~|DBX36B#Ejpp((Cm&|SySonO8R=r_*~vWxT|CgS5Jr4nr*$J$8Q@2
zL4_EsGD?yT!KX6?UI!G99i1#*sge<*<5=|#)}fR`?Zx=2ZjaqCHsg+|9F=?nns~)T
z!Kl?_&i6@^@5t(!&GDuN%Km8;myp!dQMgjdQ1kMriw(T@NDfW~F^<wj8Y*(~MK2@s
zR64X56ERdIR)o06BbpjhmJ5v06)tS&dj6AhZwZkb<=s)*t<Ntsg<85+yJ~JD_WtV;
zu4z}6sqQM4LD~AO^M{mnbXupoY?vf%BR}~Vu%(9UeaEv4hCfHeHV}p%aUU;^R)zhL
zw3OZ}l3%g)RP8%zc8v@X#~$G@+z*-#c%f!(rlo@BIqeo8KySK*q7gAtz~}Q$ikl;r
z7|dW}kx3AeEHcJ+`QnVTwRs#c9}(t`S)y&xZ5(;1*i!OW+n9@j$U#4f(0XH1{{W?G
zD>in<dlVP=o-+Q}_>St4?-+}T7-bbsDvXg+RLyuM;O!qo=$gfa?Xt@q++2=}RkGaM
zw*ulb{`K&`{{V>f$S<W9dW4ZjxZ!ivS=su()gi$D0ISFc_gwesxgSSzI=I&-+RZdy
z{{R&~=9tceWH>eQR-NjvR?8qC^=S(I*1t&9eN}RxuCT!4fPBz?mzkC=WDlUjZu3fC
z3PpVGJxl7i?j94Uxn(%1o7G;Z*je=k8+?7z{{W=t>_5!LUjDLTOV|~U8@aA|br#59
zV;uOaE9>1uR+%Tfn8}0HRcf-ZSHj={fnJA2hFGyE$meF7oUIvIV{<f&M@E?*&EPrP
zieGkhotc+#sAeR#;yzzhrIZF<GzC^v1Qq`P%?|A(>+S#sJX2S6tOF7=)evG>lzjd}
z#Va9U3$@LT)?SA~swTH(1Y`ZG-$s>==2go1q}HW~;3Run9GXe421Y{YIyoeFRp^24
zxjbg9^${nySd6LjRpu~d$UAE!kzzWh>E-+u;y4t5YpUDY!*d_J$H$8Kv(nd7!Kxru
zIL<5R-$yq6F-x7qt$gSDn@8;A_Y>^)UX7{W3SJ0`!nXxbl0P&`S-=sbPy-qVQ-)Y?
zOU4OPPqn!j3hjgRYs}lLOz%o8H&3;7+(-jGRC=7cj6fxrR&O;dmurC<Fz3w?Zyv~6
zP}w-@sohT6vC`lQ$+aa-_Xq*=ia4zIQIjf>80=Jbx9cNqehB<kfxJT>w;2pg2}|`v
zlXgX?2xB`sS|<4&E!pkH4~iKhNXGXdee+X2=$U=Eae>BaxAp~X@Cjlhynx4Wxo<hB
zzvz%z!y`g4PbRGGAY}xp<kVNvGRD!!+jGF9?#Xbt87q7P>1q-_MJ@G-kxYw{KB^lz
z*4|CBHbJYjb$fq+$f%o(wh1<$1wNe%!Z54dPcnfbmcTVFwCGuv01#*$NuS)`;2N8^
zGAO&Ydr-R&DIu5sY5H}{GR7ZpEmS_H(ys6ALAIEy5zwRkF{izSfLolq<C?YNhqk4=
zpQ7FIQKI5WVwWl#H511SEA9XZtg2k<QESpfAtDAmHEF$)@qq7)(8-V6u6rv=helwX
za`?y9G{+1lF@Oi+qIec0AzFfoZkm0C$x1|<@--&y4td=#L5vpr%}uCJJh2U+15&VC
zc*|`E9hw>`R_u_xQzpA3<&j)j!*OV-C=AECBGqHIwf5FAz^T^P+8eW+(<g8SDYx38
zJ9K9>yLppel2T6AY~r+<v~n~%;T-v>t!=hkiH<Q=lRRw^3PIwG@;S*iHb$dVQrJxG
z8CU+OYm0D`Z&Butjxe&4IHupSBt-ee6zW_E=WGsij%c<x+z9HVu`6jFz?e^}i%gIb
zOD`FzuiS+m+HOBWnYVjL<KSiO@+%A|=Di%&zSwCF59XnQ-3T(=3?BpPylrB+I8(r*
z@k)tEWa3+rL_fpsnQv(#Hc^gfnN_W=%BOg!wC11Xzl_sg91%V^Bfq#Xq24lTnmOc>
zLn49zt6SKZ&zCtLRpSIg5>x<9T?s}gt=V1dOLr=<lV~0)d({ytt9bKJ?kRAf9CKEh
zluIq5lin$uxj}Z3nbp-DMP_dIqa0^Hiq^`BAR&e})dlELHUO@co^`=pUj*7LXWKFo
zcTsaneTTrwq4c*LsiPHCxftS?Bl+7bPl_nq%Wj56QpBEW<4%}0u$7Us^-Y^<FkB28
zD&9d7kAN#jCPd@V6kUvl@!sQUQmOeAXj?$R^1G;axweq)7!CDMe$#Z}!w|XVv$UV+
zu>@b<x&R?k$MI9z+$n?^l#Kn<>eg^ZHxZijs~pk%KqK*0+OC6l!OkWuS8j3mH53Tk
zhU%rAz#}V`J<{a+Ta;Jb8a_y7b!gQto0GM^yj5coHwEZbzPA=ZbC69_!XIui&*rkV
zOpHiyQ11Rz*%9BI^-jq~$?lqDDt`??2apyZ5Jo$z9U++}A$eM_k!9QnJQ^;|gmNj|
zK+iPLbjWB^ZUb{fNUCG_oPs*2pfSr96SGq^WgrclHfuo#TFLC;57%_{21y%{{fZC}
z&EGVrWJw7Jk2NTO+N=eTxjDsDLCl0m{9>+e<Y^IyTvY>=Z0-aws${tc*%~C2fz>y7
z-a>*`6vmG@K56LUIW~bv0T=|@d><8MqDzTPehpX2hirk>Phfqy!yXMZ9Wqg^*rh;b
z2Ac|^v**o5qzD(w8Y2j?f^nQx?1Yx`B$2Y7DkKq-K>(c7qKq_UfdJHN35kP}d#H#*
zsHS3I%fPIp?5{4(hl5*df|DJmvN^1tu?Mkoc7s)9UP4AwWPayZp+&^|^PVbY%(3id
zBNV{~0W93rWEN+RMauW)rH<Z5gfMIY;-QTKIX)@=`BW>CGf;)Tlo?f(ff(r3y@6-X
z$b5X7&DvhFIw2*uc&&zspx$4)xD6a<nlj56#@iY(a&(E5Jmms$o(*a=Urft(fJoo*
zSDGKEAd62W;|$CGqAOLP*{F?nf&SIa&8?Gl4vl#5rTG-PXQOQ5+wKBS=B;1SI!vGm
zBMtu5t+lm?fEaT}C9EeKRA#YDE=Suf+ZEu$fA}LtDipxK@k3mC14n=32S0&bA#GtJ
zp<Xtqqv8b}^Il0&IVJa?EEX1yHQT%HvpS#YvWSYsAYZEPw~$1u@?4s(k_%$^kONcs
z86x;&EKrw&gF)yGQr1BvO}qo*k4+0uP{QKffXCH##J09p0Sf2nRPCmui}H*g_N@`t
z<X!SrED`X0@L@KS?3yP7H2H?tQXPPAAByf*{+O`22Ys?<`mD~S=zDo3ESX$%D@;8?
zJxLY4JMhkPa|$GC%Gu9~2JJ({Ks=h+>mHQ5iE#>N6`Q`(?qHK+z`%XtwL?5IbjqcU
zQSBJ(v}K<mf$>7mDugkV1o<^_s>I7HY*h!&YKCQPjIwavF;Yyfio4Q^xJB-OSlOQh
z^Z27K!3PHvDvzG+*s54ghv;ueO{-f8rW<1HAG+;+jf&q)uojsBEpX34`s#fci#f*o
zbH-1K>Uw{v^oyw0MU(>Hn&ayhX-lpSrjg@{{hcn(`qd*OI2Dog6pMMNZq89n4F;>H
zxZ-jD05r?H{Wc{o*9-puHG$!q@ND-Z_7fP79}!YA??Q~Bf$SR3OX|HYEQs52tiP%~
zeQ_KQ9g<@_X0C>hEW8xUJR{@-^>awExAy0=0cB(IQC#xb8w8a(6iw!>eSYlMQ#+D)
z$?AuSX)*xlc7E%>rpGLBN=)SEnw+f#u4RY@>1as-c><vz4#f~BRA30mJ<~5GhBNLb
zpB1IDkc(@EkxJ(qc;b>kugE<4p)~o#lkLD6s|Se1mN+@_N<t{)F-7f1%Qa~98+40l
zU~)!HQ)yPOCD47nb6Trtwnw~RpCYqKJ;prKYl6br+A?HJG77@__g^t1#25!W3|6;T
z)S<G`+i%5)=D4Q6eI@PV$mh=$1i_DcwkDM$&#9|O#g2T}SM=L}p$&2kCBk1J&zkFA
zmMTi;oQi1hRq9dU8=W`;2R&Af%=`h)4l6fgKwzS;td8NF^;V<JWP4a>h|Pco2=i8!
zQcG?GZ_NR3xkv!xHFF9NZA)@#*+w^Sl4$N@=(j&qQcoB_O9Ch}X&d#yK54ghe%u9-
z+atv*EsA8f7mVyuANi<1ZzJ-|Xk9vRfT<rW)tX2OVv69;l3TlAp4&tT4b@L=aqgxG
z>YF(P5kLw50B<kg)NL+GmQB6%To3M@BPckiBavPuaZYo%W`gIFMe)G>+4D%<b5Z9M
z_`Y&r^+XGv4@W4Ec1B6+yq`11Gf~`tvXlmzj?oNY9QmmJ$eT}V=yc!=?i`vO5S+Cw
z!!~kGb5Ja@0!nfRbt{5yN?eV&DbD_Q;;J<!GTO991QAw~$|a29Ks8Nyg$JA(U-DlD
z(@h-PS9vs@O^TeMJc_;2FKsWZ%J6f+{8b;X-9rcBvKnT#+I{fa$J-~7Rcy1<>9M(D
zPs!4TmFzina#Qi*oYpTqfs#xQ?u65I)SeU|f{Zy8J=UWd?GWd2JqoS54$!OV8phN|
z6Xc%gdwE<gFi!5x2e#(s<J@p|4;5=~4%qidc7fFd7xam3S=J~20N4d(Jzc09O*&ne
zUB;}hwP@nrf$#@-u663oSl?WT(2|42M7J|eC@>Tr>NJ_#o=p{IB30eL@l|lX6sada
zRXP#9anR<pMVgI9n==U!QMQcv^IOXkGekBy9B@IdXQ#Q6WWo66x;B|8OLW-BfzE40
zNSI9T?2MDS&vgsBn{rEhkBVW0%_B%(^NJefi9KxsoQ0K<ghr&P$nJ#IqeE*eDiRM=
zMw}X0Mlp{y2C*Z@0YQKVH5m+Xw_&-YY-7a>EJU$>zNo2Yc_nf{<n$^jgu71l$JHnX
zTe)l@UPqdSedc|n(QvYucQD$+fr^e~Iah29^-zU|gSj1t`Z3K-sx(b*#4b<WN1{pY
zNK_Aq?9|${Y9nojk0O<;Lm0c_&PXTVC_<>wk~t%)u+(5Da|4B{=ZqvrV{=(0!yOj>
zhk(zjFStO(cb`s7VXz@Nz~a7m(Qe+_QMHP(^IePd`&Tzh9l7PdHRC_2wAnI8dg&gQ
z(KA9n7umLJovI#Kjt|{eYF1|R4{Ay^TWcs%*>lwmV=9fmAzl^|+N~a{{Gzm3+}nuO
zRy&WH(P``c01Ib|Y`)doD3gqNr(dLTL?eNY;xUm)lOvm<m9$IkT5=9IAF5@wlElS?
zoK;jhbb@W|a=`s&t!(YyL$*l{-@2|!yx9FBJDYiKi9s$S^G~qT8UvW*C4agm{@f%O
zh3nc>bM4HGarH>%_Sb=xRoFyVkwV_whie|Hht&5MCPj|j2`qc3Yg%Q@x|~y_qyT!W
zxFABRTmnF*TVYb{3R`(a3tCRL=<^((IIF!jPx~+vjy%+Quw{s2s*_BN1Oa&lmQ&n_
z-xkWYx4uiJlpldLWU(8Id@$$kov3@UoxkZdBrFyXigL!I<pOhU0LESkHW8nyGquh#
zeyD4=NcmznR($$_Vbz2zTPLAKVwCzrL*!_GZ@NOoKr}9*L__%#kE)#C?uUGXVxK0V
zMz@)_23HgkYnFhMOsRnC63kcw?fA_aFB;s1c*!+A)c(SO5bm6jtg(lX4AV+#UJFvC
z$dW|&G32mSJ}M~ejk(_$%{KhVW7!KG-->m)xm$co{IOA80lGuY`^WKh$@-vH=-u6W
zaj6A+rHUv0$$1X!_@ffp5<?;=RwKG^j77Czn!VJPc9~<wd-|toO)T*V1MW`gx|||t
zy}3CD?uybOFx!aGaYuZ1RdN>>R>-a{;Yole2kM)6efT#HeyO^KnvFlY#OE|hLOp<^
zfCWw!G$+P@ZLJ(bxtgc9m_huPclS_xjU0s;u(dQc(oORmtu&w5wBIkI*A4rQKnkXZ
z+3g%5=A^p~8ewQlIPQJZB%H5`X#Jutg00z^iJW8(s69_m5y-0o=kFBh=D2x`YM5WT
zMYoD2JI-ABr;1&$biyrKE11Ufhd*>R`g=ftXr+QU;`cHtmr<HYW!o7fbw&|#mjshm
zdfj4nF6WU`#dx<lhX)JhtERV^QA!ds-CAie$Lz9|0kK*pqLqod`zr2Hz1CG6R%=os
zDam8SYHv$NGYnQ=TD57^gOE*a(!O~Up9B`OyUIPFNY6N{b(N536<4FgEn8_ht01MF
zvUojJ*=u(LC-z2XMwAyLiZD00J_j{beQL@uhm%Cv!E~12;yG`lNhrpxF_hnsnCznx
z`+)rEQd^^Q?G(cO(<~D5Iep84{?vc(AqSPgs%0jfb~;J&h@`MtWDO*o{R30PlR$8%
zD?>TBmSE8@#-+QOcwosHG>>!Ph0^H5wq1h-!U~)stYw2_Vun|70aa4KQ%%FNh{EHF
z`+@R8vt0sMxH!d7GHj2A-C8TfW{uQf6Zce0ZcMBhvG}a>T#$rB3?<pQ`lw?OyKQa=
zJW(=*T%gWqX+A_@a78EyuA*gN%6P>(AQjX<o@t*lfMAhR#TuBmlky%7MZpWJ1!hoC
z;*Vo2k?n8fDyvDIUNVc2MO#80mvbBeS{EpYh9wF}kl>1l$i_@YbDDMH05@}rWu$v#
z_jB?3rm`_dGB9Du9aRPA`0igkb6R$fd+t&yhgyIv<0m<)vJeGi%a%QOq*(it85J{t
z3Ie%|cZ&3cI6mLTKciI;i``xV3{&KoZ3;h-^GrlkIU|Z$l@Nw+;(Dk<L8edc+l|Mr
zD8bn0aQm$Ok0@D+vG|^aUMi7-Gqm<<^($6Fc|nV5M$UXuEE-7Fy!}w9ea6{>K%=1>
zA|<=%Q3xZXf=uJ(_dsi~W-!3yind`VcsLam)w<7W<bNzsmneoSlKV$_sWJ;;$5lVD
zAf1aGpQ;x6VYqRFR9z7BAM%X#O^sx_xDdpJn4BmSE!^r95OZ2T(&_D=)bR^|G2=B%
z<}yb@{XG3OXE9!CDZHLS9~IW0LASNFjbn%aJPIpJmIbtrVFMsythEM=StAx3vs`@B
zl-skRjw&fo1Qwz_E<?Ak6yHwGkK~(q;;j`y&}Dpp$Eu;xp5+_~Fx>d1Zm4*xvu$V+
zW(15;p4!IW9*qO1JZykD+HplBn3G`SebVmxCeX;DLb)QFurm{idP~Lt;)_8Uhd3Fh
z;fc{zED0eh)CBr`4=zaHRn5^=jNyJqst~Xd;nxPMm3b|V_$lJBg|1AIKU`w1EMSpU
zgSJ8HgSKVZ?}DdwT-Y>k8mT<c#?u=jXo2C!0A_`}fCR`+YV+DcYAc&yM<%Unf~5H}
zQ(HWzKHexjLrJxnJ-K31PvW(bLl_6-C~0i+H*N$TjaN-{J*U~pJz4sH4b&`c4#)2T
zxaX@qKdfm|ZI%`Yy)Yfu*I%*t<%VjbR`m6q`ZCQ5xIbexq8gl$?5bF!BjU+@&v@@s
zpUo&ywES~j@7CX>sc_A4rwxtXE0ya$kkz!SgB<qmRDY-rD{ef|$Yq`>M9LV*92`}~
zgK;ST03PQ2b3+?h7DiT5XuDMq?IVWIHKH*pY}Q#%C$qbA7GKRR!?da$ow9ows4PP&
zDPC2FG(^#|jCeoztI|NQdE$hoBV)R%^&5E_)tj)Ut(jFu?yA1+UU=OXIPZ#$a0@c+
z{MlEt{{V`b+@j?jvOeiOt&Z!1lTk?*wz=L3`lKN*xVb8)KSqX;T(bgrs2)~E$i`{$
z%%u+O<oTs+Bdi;3#|?v;(AqFR`ky1>vhfgZL0tD*jVoz6aseJGpay7kc)^?vxF)T|
zsQuQ^a62_gV{z@t?hcQdyOGN2&2(D_99DUDb8<b?W)Jl@QZcvf6R)vc;zr%eRqw2>
zn&VPp#PDi_VX`VNjo4$h2y9sr2VO99T|3a4fDxm=I2Fw!1b16MNH_TQ^hZ<?UamW_
zop(u?O5wlLT3c`c4xLsHLc0mJHaynPPMoR&Ijf@2C8&nh?7y@D*L!%YJ68<h3t?)O
z-ay=gkGiz7A=@2On6I7-kg9|PZP}>`hK#FFa4}ZKXp|(HN;F1Dqq$$&+_qQZY0=!N
z9o4+{(aZ|Cs_8UvtV*#I$(3Zp<;n$`>##Ygt!-9V{mH-DrGrk6Grf-DD3gu20|KMu
zsyO!cM;Hn=j)gr~4#hL0US_}|i4&n)lf@?H8NsG3n@Px~mj!sJ3l}*XvrRH{ifoKP
z9MnwU)CTHfrIQ7e9Mml)#3~!U>M5UTq2_YKIqIMzE=8S)jF8+_4yABVwoXMEdpiMu
zc&wkPby)2!B$8GG1B$L(8}!ikB%+L;tS-gP*R!4lV{c+*pMk|5w?_K)q%GN~^#Eg=
z3NmP}k7tG(Afh`=)F!oHu`%bdT@TaH8R1E!lO*$lT*fwH<12zIpn6X1TttzeV9VyK
z$h?W2I^fw^AcsEa3=dW7#z`0_wszF^sI3qV<2a@}h@}COa{bjtS~FVxjkGXI-rfmZ
z*DAg!le;P89;>Q)(n2)3W{<E4u0t>R7`gsL^G6xK65w=h%;rpDinWk#!5Pg79(HW-
zD#FebXu*Gr3b_`Rb|l4h79+x%+uHA9Fxx>DiLqt<%5FJOYi9%QjC+0sXvPB0)GquM
z&M9sLQ)VDA$IUk1CvHEHq`4m4;Dd%V{e=&=NiE00s=Hkiv`kw)QOMbts~mAqYi#Dh
zhBBkg5*bw+R21dB{wdz++yuWWkx-?%leH(sF6doA0|Zhq4M@iR6Cm@7ZP&N$GP2>v
zRI?q5ts8O<O(o3H+z)UrAI(ApdT_@JFfX{D^wAomd1Bj$Q$Xn|qJNO<$g69njicP$
zs2o%ZGFqZ6Xjq;qf@AO`A2p@d5ndw0f#6nk;{qm9aw{a)BcYb+=0tB5*#7`cZP5}t
zfyp(-?B!Tr<;d|}GxXnD3p&Eg0Q#>#>$vAVGf_I;l{m*Tv#@G6F;0tbj&aGV>zgqQ
zKH@)AWuz%`-?#*ILEf`TDUDko9&6?h;#7O2{wjczTaGzaHqPp~vAvFG1f1rCk4Lna
zhj0T9sx-8c*6%2!tvuE_E;}N8@LAr$CB&hv_{WNLvR&E4GK3RJ72-s|vta#Hli$N~
zyI-MMo%Kbrx7hm$8dcvT=kY{Kt6ST!jZRqoRa|AJm)nCodM1mC#>rTtWuM739AdjA
zTeet5w&|7g%@uVTMFvA)&|*04W89CxsMhj3=vYSIA;nUn+O$UbDz5cJY1$Wj;PqEJ
zO!2x8Zxu@piUrh@%G<NmPo>7u+^~(0eO0oI65)?3lHj}5-~0gLhk*ja?kjF`I2G+o
zOYCht<WPDvS0X~P9H{D%T?@7~U{R#T?b_=k6TqiGr{J3LZuks3t82SvCpgVV6s9F2
zT;Nlz(_IsZbP256Jlb>H1B{Lcs=rEEHIC%Dl)pS?ucHFtz8Ty3q@Lb5`#kxgvrBHm
zFOOp{1+op<82r>K!c^M{&-FrT*3w-kwv1IrnoTwdZ4>-$xY5|Q-SQao<Tfiuhy09r
zp)YS8CJDD7_eJLBahJ-}W;jHGGC4j?CtZx4zCta`5}<S(<C=!zQ5Yw1BBi@XZ6Mwp
z0BVx{Ij$@npktHOG$FZ(YRd~_9o|c@z@k>}3t0qt7(G>2r>&aa#f<DrnhJTOI+%&0
zVwvF6tXC$lqRUd?)ZH??d)Lhl3HI_vaZyfQ8$7N^`lzh049o4R;QnbNb!AT<4vI~F
z`60Li9Fx^uX}1#o#b0h08LHhwPf4!Fx;)Wjv{3sfc*ZMCwn`~3#kQ3Wdx+)9IOD}f
zG}}sr86Q;^n`{K^CU1%AkG&SKra=^vNjgD~50V)!BKALy4M82F-HrRUiipb$(riWs
zh%y^!NK;3Z(Hfy<ja5Cp*)?1He`;db$TXO`c}&?j`l4l&`$OCqIIAL@e2UXxmr{Z8
zBl@Up=4j?5{{R|-dxd*|tIaxDLmz4}mOOD<6nN-Fsgi03c?`a4qV)s(eAVMyNgbf~
zF^p9nrHIpVoSL@;xuh#iGMZESdB%R~!5@05>ZxZhEx_HLD#qGUFs<NL{E*=d7EN^H
z+yHWDt2dG}yqc(MfVTzMd_`K@f84{kFbh=VgHbGumq9Oj82pARVQ&nIBseCZSfIwk
z%Nk$WT&n_&@T~lJbTjCE?eba10^pOz1xslvT*f!xXTheMYiy~QWxq7HR#LQCxA>Y@
zva8S86Rylv&Ld5+f4HETh&>)?$QfjevHoMlLvoT7-{(D7+)t7N&kG1kIN<rL&D-s{
z*!&N=)XeN+_E=z^t2?aQGB?Y=y00Q24mU_h<O-Pp$Qx=n!!ZR%6q7WfP(y9oRRD7b
zR@t0mnrq1(^}*-OMp>3p?D@?h3d}Z+d#FOwOp-XK{F=UnebsCepBSvYp4Gff{TkB=
z3}+CZ^wscoh(gYxy|>9CrIQYTs%;)>2=Z<+>+&^g8Y@CWU>tEyx*!OYEL#8>`>L&F
zcL5<CRg%Po*hu)n;-kNbUevGhJ}D*2A~TRo0~M1v9STNaw32_JQfvT~W!l54ibURd
z9Q9NXWJTSudZ}GQ;C2Nhu$ZG<p6k&_i}BAUrI4P{BVUse7*>`7CAN=g&PO$!5lHF3
zr0T8xKP+fWF5l6ngXD#cs%(^QVfd+Bzq_%x`mYF7+yR`giY8b_!u{f{a!Ce-;K#I{
z>L{XkgD3I^qK(_!!8}oMNH>$vq-=&~Shit4<Ul#@sP#KyRIu;OZ}l5^CroX@K5CNo
z#Q@r|?$32h`42}unOrtGqBP0nvAauzVjWI+tp)y*k(kq(qMq52h+sh#LsT+8f&Df0
zoI0Jn5KR97H2@r%=~|qNaSTe2jB+dFPfluc=@+rij6&y{?w*wOTzZ>A&i?=q2b#s3
z671EAcfr)I<v;W@7IF4y9R>$Nu96oA?xl`s?raL92FlfTiKvS!*0-GPQg|k_O<Ul;
zE{(0Z#tVuvITf%67@+NCMo_?p4d#hR{zINCJu=kOEMRVcqG4tSZ34Jd$nKtw;D*gv
z9vHoa&NmUqGzO<A%xV|D=%XQFoZ}S`R4cvtB8<~$F=;R&3Q4ik;zmU?OqtH5u!0m-
ztY#ThGXass7inv6yLeH}RIxiv7Ceb2nJxz0gG`YOh{LWZu^RL#uMr&@(^h7)Hwu9B
zEUb7wXonHVixh8zMPKRRn|R=!=rpi&kxIAmJl1`(i>fZ&8FOI<FKq(uB-0)h5tGrW
zg1fjRa4XU-NEq&{6SBTrJ?n#tsnj$pJJ_Bph*iPkBvzTC&jZCm$Uli*YAyizK5_j{
z^byi74ehEQ+n!qks^`;1ZV(8D=dZ0jS*IoTpC!AXbY(fOm_1LYf712It;YZ{BDz{U
z`<}@eb7G~r5$LSj{kH^F+uIW!M<CXE8_^u8xC{!?tm@XuBuC<V711)4(%KA=eDn=P
zb!O2<8a2qrIIB>;`2@EW4d8gr#BArqF$^Z56APvQV9?EKBTseCN%2}eYRSt90`hyR
zi<S1!7a1Aqm7o+CYDA^BaZyK&z_9qA6=`$>0>OCtp`=h%LM|$hfep(xP;r{nXnST*
zTlr$MrY4Q@$CFw=OIy3JY~qE|E6XjhvrVSkf`T_gwE3yETZIvR-ZDr77^$R=>f28@
zSpig5UM*%PzXkbh^;I0?n|5J+RSTx0Az(&N6;Be9Yias`7HI_O7y$VdkckK&(ML}U
zxQZfd)|b-XC)!Td6;Ox?T=B(N>65H84~`CdqeD(MsNU)Cw5x-bQ_XF3JJg*Q1ZT*u
zf2SSbI-U(&>30GNG9LsUszuW{I^90x=^ZZJzC*w@XKRo$4QDiq^qv(Ubytwin{$i`
z(4C{sN(oAfn^yk-QxQkT7_Q;fQEBrl%fQI1=<*}us#Y{BqD(PO^us1jAAv~Eg}^lo
zvThWq7D`DW%)sCdDj9#598*ci?vak}d{a?Nc7#96C#q>9lT9v18KyQ!%{3L-?Zk``
z#d+CC01N@ysM<oSijGNKCJzAA7jj5+tTv8$s2#c5IHok7NvLF-EuX3)wuj9sfsV}u
zEaR0t3iQ(Clj5rNOO-*#RIh^ATYLj+H$oLBcf=0sjQ*kegZGzzwT1o)epof4{YLdd
zTidm)51ANagItbD9!Xi)l1c8fPF8xaOVRCK<cqB~=*H^XxU1Qu_Yq}Yaw{QgCei_|
zN2i#~;vx$JO&J}xM5r0cswU!lt?#8R(#inR2HZtMp=h?~caf5R6{(KJQ9(EYgr#O2
z*>`Y+wpO^wM<7v*C5|DVkQE42w`|7wQ`n*P4M}5&#|at7HB6W>L&=f-M%5W@y@<eF
z+#FXsg^9=@R)^LTJ=NLVcpX-46@w90K~guxQYN<<kc{_72;^=vj8%L{N#lYl%E*^p
z%=pI@Y6xDR5DYgl?zcKH{{RL!Bo8%*wc008r*=<OxUh_vK}W&IReTtwgmM*}ZWyGz
znFP6RbLOCV)H<o;`KJUAAtYz$RiPgWlEASibw5)d_AZz>#W^FDaKL{;miBgQM~Uz_
z?x0MpQlw~9oPpreVvS{W$j0M>eyOrq7myWvsXWwbOcNN3WO$@vgk4+59KPB<P<BlL
zDw8JEY-g&oj3khl=Klau;Lz-mLzrb;pAGdugql!cm0&Se5^Yowe;|3FErJ_`Slf@f
zw2~04Bs_uQiWwRGavUk!{{R)4B~+c`uNA4+8Jl(&3f)#_P#bd|2(0-RBOn#p(-&m`
zXMih3>G1JfB$4hIJmRwDnI_}SZ2c8Fw8A-*5NnyOZaAB(HR%$05<9n`LtN>A$j2m9
zdab;c*M(FZ6G`Zs*rPy*I23i&{8tyQ9WjH)74lt6Ydo4q&@{H=MQU6)AQC~VIB%l~
zjiHV)Kx+`J(H6GhzZFko9reEHBqgbRwK(0Eab|)WaMLvIxgFx9Nv)-Qf;1E!mvubK
zLoU#0vtw?6xRVl}%~K@PrppwOYhc!pbO-xWCAV3web~+xgVXKh0G2tZwJGnTZ)(yq
zN$k}sE>4hnPlj1u!UeNNvSo;<bbBw`#6pa*<j~q|la>N>CA`*x&O}w^1qnA5w2X|Z
zBwsuOimtKRE?JlEo9dgyg50pk{;HEt)Mg9ssvWg4;&N74qj5HJp`H#v1XMO32P#ED
zrfLgyZ@Z?&K$l+C(E+nT+Tf`{z6%@KW4A)mHV1W_)%6GcqsbB$Ul^-vyk0{7=UC!_
zvxF%2SY=NfifA~lSka4#Rq3rt?sY#54Edrjb%eLR5ZxW1^lI1kP{4n}yMFL0lhk@V
zT7go`L#<yKVxCT^44uA9nCFIgej_X3iWxn~Wi5_AsoG+trKE6$<PJ?G=A&$nw4ayO
zDuljGE=QWBX%b1PDIz_DG5k}-kuE2Z@$@KIt}W#vLxGB0+2xiJmf0lv`lC+jg=)Ga
zzlkPIjM)_(ghn(fSK~tI;^y6t?lSqSMx6p<7^5cRz?)ec>_)~YIaIyUA8c*Uc&K#^
zLKJmE*>9>L^#zK{-Dvm&x{4C|?0DRHEUqK}0Ji{rRZLO{uJ$S&$MHq!nuL0UJD0^Z
zq*T68P+-yK95T46HpuURE4+?qSY(ZyBOjGmYj5qqT}F7|Rz|_^XVp0cBkwhSJbU8U
zu<w&n*1-pPs`;TZzSI0gO?Pt86kL}Fzr_%alEwZt=M;%Js}1s3e%2*8hlZy`1d*J`
zOMi7D#PLd8oFA%s+<T4fu-Z?WWhpSr^g|r#B`dcggVp9@aErxQu(p5`k+}J&Hbr>Y
zLI@Pmbnb+-Q(D@SnK6S%6(YF@I63C5ZnJJSEmx#Mc{ej-2DGWUatNhmz2DBn5OOhD
z*<(ToJdsxd>NQW>7S9}2Wx}etB90c*rYw_*pT3L<_hSN-6(A5PHHixM)@mzu-hgMi
z>7@3i3m+^gB9uI4tn`>;jt^;{6<G3!<O<YOGXv)oxi>h_(Mh{38rcJf5)ws7x041D
z0i5xg0y(ALJ|v|Uw|3xgK0>m%zdV+f3ub0e%-q+e_wCaN*~M1eU*3QPykw8MD#CLz
z;^QH{s<#QHYzgvW6exDV=9YJFXJ{PI=_;#?au1qp_fmpDOsV5F_a=Oldleg4jJ;Lr
zakQ%C`|(y{LG7q238=2Z<8u%8pyR*{#8`_XJpEJ?M<DsR@5K{w8>Q)xug5vBN`2<{
zc-!?^=zyy5JVG`Ha6Qu{Xu$viF;7IxXKp$1O=Sh}M->Q9Wf+z~!Lg6J&|1m7DIe+V
z)e=U=FY(|Sv(Vo#m0`OFnztJPV$5V?wQ>2QY}6Q-$Vnrr4kG6+i@@<uw3WTXxZocZ
zZ|H-jFgNiZDu5!%Fb;7=OB`Xz1Cl$acCUU#K$RMt2_4xO&U!UlaUK^Tuya>>s)=o~
zx!%2EsJ6+pw49U2HJWOf1UsETW5?YL6B6Kc_fx2G1Mmo;fnq{~%^?E>kF#o$e-ve_
zq$&x{^FhS~s6!#o-Ac?h5=jm9M25FbluZE7VyqY`h@mG0R%6rCmXly?{{VG*&ca;o
z!15~I!kOsMM#CQ=Q%Vs)W#PW7(Yo$g6@ETN5F=T#NWzY&&`4)ynk8t!1aNA2;X?@y
z2JBNlHva$|WD`ZM7Crkx{^?q>6jv7B%VoPJgS^sht`xIv+((MM)NO?E`>NP0&MJ}E
z$i-E*;Qs*4RNzZ9SK2hl+hZG-#wuMCQ@+t|6f!rq4Qb4Y3ZpQ|9gI|RC9T)~q7Tm%
zRIymmog2?b`sUhbQ5sOE9TQqx%gbFtE$HAl@(ppc((KM!-cu9!#UH2LU)naqF<?H;
zWS*Jd1@X=HbsZ<wS6W2EIhe@?e(R)Y9;DK>7!=2FfzSF$=DvG->1I*9RAOt$97$^{
z#^3#>sd(D$7AYswKB|e4fG9x~E-~W1Z_>Se>Mb!qZPO_K0Nq-BKlPeOz!$cJeUK=7
zLH8NiBq!Vibx}xJ<N#z3HG$KASA9El0F|0H{EE@odf(HRxc2WO2lCXf6B%VMB;18j
zKIph+D8T{dfVS0i%YZ$rxKIB8%|}5csUd`j$Lty&+eG(Jd~+}uK56SK2EjD~IunpV
zsR2{XHr5TuU)(FmRP-t~k#?399FYygt79~)5I^eTk)|?eeZDrHd#^I{gXWl^Zrhwx
zrZVFM^-HuiH3%8pDXtZcG0jPX!m!ONCdDLF+SweEE2oSwaD0mSoAt&7gViJ{$KEUG
zzfkLTnpK>$Tu7_6PZ_V2{duiPuKJo=kT*t23tBXo?;>Scr}3hyRDx)MP{EhYSZJ4r
zKFp+*9Z(VO8CDo<@F=@T@o*W}VXo5sm`yUv9>A(FD2BEV8nWYY`=}#W{k3cZ#T6oE
zHDr^;Pa^`{TEsvN$*N6LOj1aXZaXz}(l_vb9%_?cxjQyO<R7s~Ic)@;D=4)~Gwl_O
z0!P(Q%7q3;W5rtBHQEw)F%<1S&2D~AfPRf;aw(Q&MQxmIe8M(D2=iM%O<BenVgoKd
zs5F8bL&~pSDQ$HlYjN(!!+4=>n6l^I=@#CvzWuJ8ZHp0s)pKS^8b-JoP%8Uezc*Jx
zOQJ~44|{QKJ)nveQHn8&eVQ;uK1{BqdviG0<dq#zw&1n_6={EIwLwmE-7?P2iOC#M
zqgqLHENPV)6(-VzGLDTMCAb#(@+cQAjOY8JkWChqsghO~0ddt`d#e^DJ=PtkC6ve7
z`K>LD;#}IOe!#(`lJaNGj?uI9_NG!r!5a;3W0p3w_d|h^T#HD$Snj=#2l$g(+bu=X
zGYp5v6=WX=oYm>7S4Twj`$pSNa!qN9fS?1xtQVzkqcMTx8r$1Svcw29{tjJK+7*Zg
z6%4Ws+*74&bwaVjWYI#&k`uIYJFgGoaZHu>C?=V_)Iz#USL9@OO=&OyHQ>i~MKQS~
z(GvcMrtgn5JkmBW-3pkJT7tRvOERe9pp9QbOv*4p;-Ps#$JIC4l=dl~wUp!%YB8d@
z2fw&j!)-ambFW=`rYks_7>P3xis-ZH<&zDbd#typJrifBhIU*(*sA24Tpb-7!yJj^
z=-^tF#J6*_(y?YI%>+2Em#qGoY7n`5bGkBo)@M-kcByC%V%!JSXK3~K^W&2EvVsLH
z;10>IxB7jeMv%uW%aX%2%p<k9wVh0`y5RX0*8MMbve`rJJ^%n3h+Ryc-VIz4MQwBj
z&w=yxQfe0>FjXB&(`~O8J7z%G6${%%ER04#J}5IRS}JAKLpcc9B;u=eB$mw{W@6j=
zKUHLO)U>!~lN(s^Pt+}ccVf{Wt_>oq<kJ_)guW5Y3pNf<lUdmjgq`(UX|(TV2aU%g
z?y{PKY?Orqiq_JjQUfmNa62_=r!xW<3Pn`59#BV$v9?EC6({|vP)ke{nOH8;Yf+;+
z7*w<K;<BpkAVegS_gW1koFrKOJc_y<fW0AE#_&JdrgAr_0Dvk8nTf$skwr$|<0|Sh
zfALo$62%+4E*Ruc_h7{wt1^;EIHFjXij*}Dr#W~Xm;!dx36_Ra1gs=ew3=&_5{WkC
zo`I;PEa8c6Gg2m5QO_zo(uzfQ70l6@Bgjx`s4x)DOAj<$rH&(tLRGv_w;Q8<m<Juy
z2wkvOnOP93RlU(fLA2x%%~I)-$Zk<c_~)v*yg&|m&p=T@MOL6gz{$6o$!baMNys9$
zSGWUf@O)NhRUaltT%Q$AK%#=@C4+^c^t*{*nIu;r9~3>~dtfeqx`r9lW1hOKlZuk2
z0Lef$zgY=5yGM2XI@M#96`AqI4r`;VG0ALZSjQtd#c)5<Lp$7~ZaXtw>fZ7=w4kg)
z<!~$IpVS&y;naM5p6}B0%O)hsyG7e{%<b?es<62>^GC4&85pYcwr59e<C=orU6PcL
z;X&@W@19Z9*~x2bV;oWP*s7cROZK}jX&?wb=<_m8>aj`j9n_ZrqX5QoJkqH+yBQ&x
za%wy*tPkW-_ipy_WssBiQQF;zV%*@5_NP6`y8|R`s-)X}4!>bKWTH?BN(0n0U)$Xz
zme>&I<S58)ZEaapxZHXbEbNy|0qTN#9ni69uw!bPg|e9!A*u_zG`4_7iU6y0jiNvk
zg#xOt?$$>!u>)-?oUV%^uap(0TcW|Z009&&-J^yw4D(K~y+$HEzz(Y7?GyG}hBJ;R
z+l%B@<e3JO2l#O*6&=iICHUY0OtN=q$qkZt#RoB%Uu~0xH9sazuK5bNzDRbU0E69o
zWOI*j0m!O9Q0cdivF)TMo<&(%hla!nn1>5Z?b1y#n~vTIr-hkzCIeJr3wWac06bxw
z{{X0psnaj+=hS41P5^EX;;(L@4;ahS<i_zdg^cY}=neGP?jj~Lf#Ve9kM?%^Pk8Q=
ze2^(v&-GDS{^EHFx&A&W6LrfS^XGwWE`P|y(40_rcmD0qu(6f=)s4jV(BW_fXm_+1
zALKwku}v12XdSx;Z8A5JC&uC`O;RHE1{6`%85wBd+D0)+6D-bCAXIrJX_6}6Aikcm
z7Pi<?-*i3km2Uwb$2EUr8bV4j2Q)6D2fABg@I29s+@*3Onv95DJ?IASk;Op@s%1*3
zKXg`~6Gv*>`7|Bmi4086-_<mGq*W(wgu4kE{FE`K+Dg!&ZMgcO^!ucnJF?iKC6YN<
z$B-%9THtD=1$T3IB!tGfU&TWm%V!RRH5(+WZywbdq=`~rlK>4elCFkofh2_7WXi=r
z>ZP@_F+|ct#}xaBq;G^{HRel&x7be6ifz`YPZm<@cTc#dZ9~Bvp3dULF{7ckU)&wz
zcjVN!v8++>D@u}&2(7jcr@*fK^25zTeuXF6!-|tmc@pDs2XC62QN)e8a79G^e2*sL
zxicEAvpa4BkE#Fz3##DOhJCE3>{T`250WdPj&`Yu!|1NmZc4uWq~eIuuKmRK9Zfz+
z<8%eo8ZS=vB3-61MO!JyWl$p-DOhHoc9KjZDe<cbEiFTPi;U1V*EWU9B86&|5uRmj
z$8Kv3mp9ROZi)TX5`<7OQ?R#)861I5X0cE=l@I;YcL@~fg~JM}$jl1Bs|Q?<G}dBZ
zz#M(jNh1PKf<CFxz9QJ`k@&B<A0#1wqOk+yO+M|HX$OOjsj`GeUV+bwTgFK+7lV%I
z_#siLMeZQ^V~*;aM%&D3mNj{)MI&3`Ivv$zyn-o#11HT<B}{|}?+IjO$v!DIZa8i;
z$f9Q2@|OdHQA$K)QV?LrmgH52gEq^I4x5UqQg@PZMd|ab6RVIhnrQIIYj=cf1?*6-
zi5ucE%{$-s&@v8BnrfjWfDZ39c><6ZP^xf8nuc`&`S`d005vtdL=5icnQ_dkzmfM*
zvJqT75k>Kja~jR+D}qQLBjU9(K$3ic7!(GlW(-A@dKFaH785c{?AV~3_g;Yz2G0hk
zb}9%&`BWl9u&XEyRFF@4NV1A^PWed|f7A_00xz~?kdMVO2NB7E&P_rhcqMyx4pgfO
z(^=0Q+#_+rky-17_Y^T7BSdMkg_JM26!lLALrL!lqY?iAExvx}=x!r>QLy31bxk2S
z3QzQY=&0f_s*-qa>e{jg+o-yX$Xo)4uex-OxXEl#*5S~fY~@Mnl1XtU)<cSogd$8h
z0y!`bbwRGeW0_he>yuXCrgsC6#R+u*n6jQh>lBVQLbfZ9dPNMt0o_eB(W-8Y?jC44
zZOjV5w>2R_9076(VdAR*`)Qg<#tBhIytrT(W~=6vV_5x=G6yvCBW%e$wHhIBr`;)(
z2W*e3wubU1xd@;y-Da&7kZ*nugGnP#8~cI0_fn8-+DsAHVMYZ<1U9X3R(RB*{7^bo
z<4&lp*sBboKOl3OZK!JtKA+O28^QoT6gT|}!a2x@$hrDzHV0svN#dO$WKj6dYQJtj
zLAEm1&Rav6F60Nt%8UO1uWJ?<0`Y+Gb4|7?>RY;o=HGDt03paHidTw1M7CYn`ukV6
z+Ok}4t7yMet|A~vnXYrKTgh!AmE<3)t9O*JAcKluW)HzPGqe8y`BMy^`X@9+uj*~S
z+(@`4xP+6)Sa3r5sp4=GdGS@tO$Iv1(cIwuL9>y;2OQOy{Xnus1b}PjwzzhW{5%?c
zwx=Xg4{(+qC`U;I7nQT<5B~rtHjD`o)qbVx=aM3@CenP@Er#W!gi1O;RU{Vyr2ywO
zV$(!j>?_rG7rMD|adcT*#(J!u{Q^5|IgmaMMP6zW5ic3$hGlL6Y=c%q8jnD3`!iG9
zi0;dgl1(Ix8TsIk6``KmA_}k@yc(g@wE3e9@wP=cv^pF8k(3$DNG{|nlGyqcR}wK@
z&D}{JQea;seN+cQlV3?G_Q2=vguS<tXwSBHHYx4{+<bm3O9bA_9fxtx6dY3{Wl#SA
zoGp-v?Tm^2bx9S?tb>fi!^LYXu2ug4_0d7=1XQh}T1lBqXaju;SA@?z5tl|nMrR)v
z-$IsX*s~^UV*Z!5c*%vFIjZ^nF**_ifaAfc-Z>*Qw6a|oyRFv}<Dr_3?HUpoHEkQx
z*G>RZocR?!ucj3aZUJH`b~5-zRYo?>)=SBD6*;RP`ctA5W!w3!dg+O7yV5KX=M>pO
zJ8dbNwI@c1+U&&KL9!NHWPwyOd<O&ju9YQ?gR>R{HZfV9UqdS{=!n{AG{&<#x-V;l
z!+q1>xO;gQ<?ub!mo}3`#Y|^!-YOvm_6jK)CuW8>_t9sKfMw)W)|+_OX@+b%4r?W0
zDYu+NNo6%|Wf4fS!;pOA%_`xTwKx=!w$L?0Z*r*3#dNJ7P!hzYTZ-Y!d?f&<3_BID
z^na)l5F)uJ2cuTPUJo;^(n-DX?CR?%BPOC}4BtYlEH6^xFv$FWbXhnV&U02#%~E2`
zI3EuG05rh-)DnjGH<B<sQi%p~Dj-ZFe<0JzA5=QX38y0-Pb5(lGF4oXNyR-<Fui}h
zL6T|v@VKBdl@4fEz@%$_G6hPqAplgW;EsBzBaMKyt7CVH9!LUkSC18Y?WsU~8iL{_
zkc<IPZLKa0=A#OGII3v$n5SdzDE|O@(O;%!Dm)s5MT3!3VjuZagR3)FzL?VQ5XCLJ
z<bS9Np`X&nO(m=e+n?iI2~eYxfN8gCW&i|HNi0W052;NmZ08WkbqwDlhaH1fR`=z`
z(xA0kYx-)-31nPI268J29Q*ECFLb1;ag5e!D&Xv4pSZU~)m<;W2@W&nqSG0eVb=kC
zR9a2mBoNHNpUoSn!R&)PEtB*q8a6^!ihX(^ER4gBD+8^-i7DD~_gm}MmUwps!TPLT
zy=x}(yUkw*a5m$1jN0-}I~8eTxWsn2;AX48^yVOnp3;4;MhF!VX>Sfgqp|$erkymc
zIdyHL!K!4N029dit8EzI<f+)*RnZjD*lt!5O53wWs}_)uII8Pyxg@awebG=zNMO9x
z$bq*GeXvR3bZWllVvtBk;}lu|%Bdm74<d)wZHMg6)5mnlq8TYAc74jZQh5|s_7QC(
zXg+E?a+~%b;+-|myDLY47Z@gx1d19QI-S6ZWQu_cvwXg(%Q1o>5|HACyx#A^kx+s9
zQAV8@oEo-f9{G^+1!wfeBJwqE^IF1?`vpq;jTAB|^#l@p`W`BeRD`(<di$+(s=u}|
zx`CdJW;K#ykVJ$L=BvrjOopbY(vZFl4G=)U5t^{phdD*XSHuX($gFH+bZ^reh{e#0
zAPVfSYHw`S5%FI(dUpO>E0uV&-!<O<08jNvFQ84}fJd6}U)1_3#(wmAJ$IsNV&k#+
zEE3k{*_m2h%k@zmHhaJI?B=cY=o&4usWco{TM!30HRL0a_vG~S+O!D;td>_QuOVSq
zKKwDLXCM#FLj<sw_ap%S0CdZk4dZ1-Bm7X4-1#f-vU&A8857wIIs8&1TP!<cZ;|L!
zQCir>!Ag9^Sy@@ziG9}qeNs0S(PLDo0v#n~oJjH=$F6#-h^*S(Ll@nO9_}36hk^K`
zl^yNb9OEOps(B?UOpRAIL~ex9x<~RVIl-*FHdnVA#J=MdkHD)v8p?Z1+l<Ir9Pw9?
zBDPon58XPF#NOUXZFFU&vMX?_8xX+Oh^&iYf^u<H^6gDWCz`y87Fb6-5lND6Rbg_b
ziajJc%)#3{@F?5ZOAYlBz|dbaF&lgekY5L8k7hq6qm8>jl-qVX-H@QqBvUWS`z5yX
zOm<j7Wh0ubO+jAp#O@dm6(bj8ZQiVPy{Qs=*ugaYN=Z@zB4h3zg&vm>?D=p!Q~ija
z{HGjsnzC6gR?6b5f|A{DRyC4zB$^}l>Tw?3tyA1wgD{IAigd7B032g=2PH0AEPQr=
z+Qi(EB&~tqQ8w{U3jY9)m8j-fT!9vGMI@6)78r6WIX;0aYk+INa4Bav$BG8hW|CY-
z6Bf-{>9+D}S7l^UBU8z(Zkwdrp!mYS-Yd81{*?6*-98b_)cS(J_O4lrYkSz{R%KzD
zGU`(ez8GVQ+1zOWuH_m2>W9?2K{iF#{p$U1q9{jxC(&M?)fQQ(<VJqVb&?{csx_z@
z>HVZ7Mth>IqIe|P&vaDQrI&X^*9CvKE)KM%8?wIA%l6O!8z=6d_Le)H%uma@Ij30v
z0K)y!HJOly@l59{<chmwj<$;!IRxT?d0}!^Gsvl=fWN^<H569K86XdtsRWl`ZprPg
zKWzal0X123s7o^tBOyf<HLSx8C-M5Eh{n=;SW*4ek=nWJR;-RGBQXX82hCOA++oPw
zkw$9{kPm7u&yiHiYb>&5iN$Ee5AI^R4O@^P3W<_@)0W;y;x0~e;+gFTKy1~JKx}s(
zB8;S$L|meR@-QQ32L`F9Qp1tWUx>pNSN{NNt<>$zeRW$gO<f9=6)fN`3C1XxC00-t
zg1Cp=1OtOk)1sA5(ra6a+D*>Ocr4;$?q?2Z@V(MY?QJ`J8b}gFU`q-uZie9)T(x7A
zpZP4IhfPVGNhE}ytkHJX@>`szD@{vVi5x!c3MNl^Bf^oIqEnON!@@b^D@gex0-_4*
zCNg`e9(NIk#wo2X{4hO=`;qctv&J2<(fg>xM>r>{AvUoKoSJ>iTN9oSiWfiwyIjIm
zMmVhMlCrQW20YfP4U%yn;;1$IWlMt4sS0Ymh$VB#Q1`*FDEF7TM2*VgL5iNxB7?YP
zYM>AZn{iOI?W18*I`c%zS83X$Vul%E!0Mq3-3sG;VKMx8t4x#zK%AP*>9ZSIf!l>O
zr-nv!Qn+DPDOw@~jLOpvz$cMLpjT~$bA#EakGLc*6EsW(U3U|M<WaDY>Khkoq1pXZ
zDoH{gk&dca?b+ondL0TFGc34a$JGS>$WrRCI?3)vqiFF}fs$9tj1l6r*RVYCxl@5z
ztAsm_2O!l_RLFWjar8c^v6z&GJ&x*W10+F@)kCx0h2Iz>#V7&<%<qtR@F|HH_e&Fl
zoYZ!}qL#*KLc2@uDeJ{RnoTl6Ew~PH!>Y1~w1*kU9xEvY(T@dA4PNQ@CRD)S{ngNI
zq8AAuTnNWLXltjENiqc_a|Lb(%gq@RMq+*dBvS+x2*=1*gSZ(AHlH*sj_BlX$S0~c
z$a{litO9tWU;))_i(83QbLSOI&JY3^l6WB2rd|n!erhW@eE||P6~;QMq`Z)c0o#x>
znsR%L_&kCC0E!+d9pBqyv=20H0<IJJc{HFU6K#XrcFPW`a_sOT<C+5AH4>6eTe>Zh
zVTM;6^H3#}Gs|Y^N{k-r{@Qe#1x|BV*<NV@EZM~sq-qaoFcW-(b@-->@W^b?yk{&o
zeN>`pUNQ<`4ywN1=iV!p$RqwJr;+1dZ?Jka`$kNj-f?bl!0kS1CA<>ILU`*m4b97Z
zV<d4yPb7f+IQld;hRc~9XMl$rm{m5T?ZN{hVArj$CP_*+H6_FQ=zYP8j}a`Yn(8tF
z{{Rwc4ahP3YBP^NbOrQB-ep(79e!#U<ebSIXuI%yd8+mcWs6)82PFaHituR>NF#Ed
zjSqSzSoXYTGu{ZMEyS%5LU|ojqFHZasLck@?UWzstLsb1?o)bkip1P3fkSTVu}5f^
zvO>X6C`~j`EY{vij*7vuM`EOfVv;;D#s?G##D_n`9~4@ckwOuZPgH|o_t8E^*5mGw
zDuoKl8+UaxL|OT7jY$jv$t3W-P$8ez^y3oykdC>)s+itIVDhDJSzvw`WD$zY>sCc4
zEWF3~@k_Rd%84Wd<R9*wh`W{4;|8FXSl!kjyyuffx3Vy(IvvoVLMC;X0sJxKd8-Ra
z<c=j*Y_Ee<2+RvPIK>o~L7>`OAo-z8Cbvr{%zw$8kNBtDMwul}=KR)6PSk~~ZbD8@
z39SSdVpzkggPLU$77NTfLWLr|fQg16lS2cOhJNX@DUM8++`c~)+Xy;ow?^>=8LC}D
zWVTJ{xhAar*wyoomxE2Xgi9IS$s(r211l`-^9-H2?v_#=z#qn)duGwja)-VRM<h&u
zam5CO2xYg9FtWrPH=I>${-2Ipg7Ci8ZgEFM#ndYgi2I_{guzpvs;$K^rf25#z0U9O
zw+GEb{{T$Av&zaD)4>(5hISap&)BOyJi#mDYO(SSE5?}{zRV<W#|%E}hgUreREaqb
z260>K+gmv>9^8F()mGeke#o$3arH^<G-b_!OpikK6!uHC!DRcbUYY89dB6=O59YBl
z=~pnxmH9`Ws%Y#mLwiiLR^{2l)p~!Fj)eN84mgk!Y4-OkoSsEw^yNsDF%8GvSw(uF
z;GQYdIXQIlhl2kA-OKQJH8gUN3M*$dMwelS2Lh#z<wEY?f+)6%7C9$$T^SlYDL*4#
zGmMSZQ61m8$#I_oy*0?^oMNShP<6p{(<ouI<Wz#?P6cNY_$k3|Dpk}~h6@kwh>Fb!
zyFbJ?6s>j<3uiT)ZALJd4l_gC>gti<++v|48E!6w`{gci%}BT)<YP6Rx4((T<a)A(
z@gD%VsD+#w?6ih8&RA59Y8^`q55++Z(zpt7_^UVsZ-pnCj1X19MSp4pOk09Qa6ep5
zbgxwxjF2;vUrB24DYs$tIIoxg097V4dXinds_|KJu1`_uvfSKF-iq;GS;uk?2^^Zd
ziNC|KpCDEf^!9lt(~LavTUf1~ZOo;xqrBCA4usa((h~8P0H7OsCaQHUE-RSwt(=bN
zO?z1izqgCx2bxVQR$pL3=W`!5BAV#G8<Kn&&2LVY*6o9W2i*^9$N^RZ@N1xI){*K^
z6xq%=YQ<`rbJ#uzJ@;0$(YRD_>Hu70&0c8|5Q799c>1jEuI<<zd8>^-5l+5$cg;Ll
z8Kp_XL$sjXQ`#M)cqDWDXiG^UQ<mc%D5R1(WfEinJXPogh<N0GamG06mgtGBqyP>D
z2GgrL6+?h&w+%d&(zIg$cT7V)E(D=bc4M>mP*04NUp<;raL+I&jkLSsl6~0t`Je%?
zlB3|D;=MJ~x%npAd`lAC6^*f<6#KOj`~qQzbO?J(Uvp4K4U?MIZZ1G6@*HNe+B{n>
z<0B=rTKk8LLI^ugnq(9~bC*D@qvKg$P>q)ZYT~qVIB}nIIPR&n)(sE{21jPApF<eU
zUD%V6RdM9DG2e=|)xzv2Jk@k8sOur*gIMLj$g<Hat|YyT6mXgP;=8}1HrvG#0nZhY
z^v0>8Hi;U{h$aK;n$UWCP`5WgM8j=(zh28c@o}yZ>HRAlalZRH2BCKggoaM&O+qVI
zv;P1X{{SfJr?!p@nEk`^z^eO~#mNi8^@{LtjWgBX2_ruh#qql+@sS#u(&)FW`5Q2z
zquB{kK~idcHr`N$kvJ88Sw>OlMBD5wtdj13a5uJp6&3Zh)J&o$EC+Q2*Ox)cMG46B
zQ^S976bH9r)t_vOlYNrMF@geIbBcluQb|~#$i+^Oyy@;tgfG<>r`y^{r)pt`6qOXY
z>^I~~G7rdqy(Q(!L2mA4+5qOOy-9OEmVvF%uqT>=SeH`Mg1a2J=B&l1c^rRY%4!jE
zVD|cqcM|S}noKonEN5__NTop>R9=$w^|iDiQE+?7tp(1UtTJ(t)&+Vwe^04HiE?sf
z>d|uicrv%wJ9Q1GqfgY;j+*Umquj@I-EQ76;fTWnMcIKRs8Dh-UT-ED;FUhn*I6Mr
zDI$wXS*NnwaO6LlTMZFX@vb-S&)r$cY*tLCW~B;Y8Ia&&hr0Q%A%CK~R<(Jd*&FOD
z(G@0~uCS_HNXH$TF+G{vDA^Sq%IrAtN~GG_2KIv7wWK*dPnwKC(tzstU+$Id1;NaR
z3OcD$(Z~x7ol#VmVX|oF7SZ5<Y6&kxH}N<p>b)ckj0>IE@k<1e%PEMU82nL&)hm38
z<7<OYqAV+HgcSt$TAe>&L?Z`2D+Q$(z2g?+_!+Gogs%*W%1Ko<@3dJ~TrreYBgyJ<
zxMXR&HB#)@MmrTltE!tRRfj$*);q`~hj#D}nyA(;Ab6b;Kuuqgr3Q@fwKQSngnRjH
za%wF{b-V?^&lL`?^FuKz2^r#^fX``{IVZ(@_G!jk^m`Qq;F*%RmiBwmggZ&@v<Wn^
z+2Eg;98(Efd3Kcq8hen(@m9`iRY+^%h-_z>-0oaCr`#lqD3I_)BZf%>sKpPcUPwT|
z=9aEp1yJj|*&Tu&1vWK}a>V4&s_I7Rd#1{-ae*1wKI+}R0po$!Xf8O7a>9}{Skr3`
zMM-H4_QbOt>p^N4s~$-1nI_)UD_jFyhLTc>do^PVgtYb@vfe6+3yCC=qyXS~qU?8E
z%e<43S|^RyB#T`!*8mwugH^h=NXBZ;;03q=xD{Qi<9-vWw-;zmnMF7%I6Tt8%sD2T
zcCurxDl2E+en+b4Rm3}$hDWF|oJWI?y19X&z>{#@RW{}j!7vFld^SW#9OrM<XUe1g
z0-Ge8R+VNELZ!ay!qR4)K72I~8(aIrH9?=cDp+ELeZb&WS?%c|{{U>}$swB50CIE9
zNp7JB%BggFhK2pT+k<`8^ir<i&ZCO^a+RMY9~@`h$QTFB5i==^o<Q<x1kuPo2LN^`
zc_vbQurdus>>|ps60I4@$3m*LpwbbABOTR^x&*l5rMrbBMt(O^%b*zMlXH}-A5B=B
z?OF%|k^=(6@lapDw$hS0&zgRhdo{Jdp54kg?24p}we`7%J!!42<5gII9}!vUk3k5}
znj-#L?(Ul49nRm#{m|wjpy!q}uvEDV72TOtS0^2sztL`$VD{LZ-YSw9+@}MdH6#)k
z<nI_YWL+>C=$yJ?#s}RD?y<4i_ff$zn3M?%WLKIp(g1ik$g6yilt}V&IQ$wO<(*q|
z4h<1(ts{gyVxMwj0HZE{G#_CiwNcv)sU1}|vucst2~mD0u~)G$jg>&$F+yK}B|hl*
zpA;mlhGKPT8vx}$_@Vc*F-7E6#-d<92oy)hKbor%<;d$5Q3+yX5o3X$x*6ml_UDhf
zDrmRct_CP65RZ;Iph4I>i3c@tqE0`9992YNkPP-}c;@!z1xG$<qV}>Hc8_ar-Pq$5
zHJq|4m0(Ews5IcSpgaEnx-I98U1AII9h$z#PU#EA2GAe8)G;FxRa|g$O?KZnbthn^
zEA6;A2M5g*h*%_iZ)1wB)nHj8ES-umoBTv87N*(-kg~Do^FgcNhH41^0Ln?gIOx}i
zkmb{{-5;x32&FE)P;ttF-wH`{-B8ss9=34D{{Xzv&Nd>KZgJICL{Vf6r=0UeNjcsf
z#y}iY0OIc><P0T9zR)>l%{JZAX#g#?O)bELPqrd9flwvpg?fy39+#@UW?7>rcU@K<
z!eoMcZvOybT8k;7o*cyFx7Ax0XoS+H<_Os)l2CDl@!-)4p4@)xwnyKQNeZN{eq8vc
zK_Chr{WxLOEwE>h!a>RNM58JeQabZdMK;@kf#<qzz*cKE0^AoTEId_2F80!`%3~^W
z2a43{ahc>hzlp5Irvt|O6d!Lr)pA})%N<YDa%y(V4aKZT$~a{l)ndoFM1ioPy$eJW
zZUH@0CU*!CR0S1Uid_=IrBz6bxhEqSG{Y=G5u(Up&MF3zHo&tJj>SiBmdV?N@Mxq1
zSEFhI=|a8(rCmq7&6NsrD-UHVz}uO&zIywuew%Qts7w5@nz3{nEJ;n9NgKS-vdJQ6
zZvvX=OSFo29Q0{&91n4^_ePNitghH9pp0{hf?$%P2OsTHT1ZkxMdf^&Nj;${F5e#&
zC^loYJ7E>sKf^sxzT*&AcV0zkwc9~FJJjcInxeN`bhdPN{{SzFt*QeV?q)A+Hz)4A
z()%$O@tn}VENmkuJ=Bs$$$(pNqd*waw#|wN&01(TjSu*ye~W$9Yk;xtEDKWGsGWiq
z+QXdDx*@u0%UiC|{Cw1DD=U2b{nbv7f4>&&**#{g=KzqUvx;<+2|US=uf`~aKj8q6
z=+iBvMu^1A{Tg6NLzORpJXB;OyMx`23<ghCLwyU|%Z6Zo6{Z7t*hVqVXnj)7E319z
zpgQ(`=_H9pWu+O(AJ7#0NjEX~I8jktAXaP~FzisC?1$r$GsP`frK{g;;Xokyr`laK
zbNi8!396-5SW2)t@N3q_c2ZE~SGpBMHukrly@&u;diRPoSR4ru62Dc6(yt0!W->#0
z6{57%8026QXrdMpWyu%-4uyIpRbk}$2B3}o#>+P}LP)K^Uy$yO&{c%IXz}4x3aq|Y
zw*LSwJ~VyP6B5Xu5%}{^Un|^Oh?oKmF2cEDjJritI+HV%AnmD*`nLhN6<dF8EwS1%
zKBxwYV0UK#cR|4!=c2_f(R?r5FlBsm#%X82xpo6R)kYl<JB<BNHs(f6>U!}=;86WH
z7l5hex<mviIjXz;Uh37hv{I4y6?X)iZI)Ji^UBqayQj>GtpYO<^+CbDnHQ#FUJhk_
z>gnN=3lx=Kck6lPDvXQ(?z*e#I!vUQEF&cTD!%*D8a$1WYO9~2Nb$?;(G4CL{{U&2
z(LG5Vv4#==eyaN4Tlo8)=lfJ^=sh|&Ed-@W@ko6Z;2&<`$no`2DWfKTOvgTpIQ>k@
zpoXF?y?qo7{$g?~A**_8RJ2XeNL@$fsK%XkSc$>rmC8nM^mwk$hw4w%`$W@RMA?;w
z0L^htM(MRH$Cvp$jhfQ`0QhZZlbo^q)bRR9$12Sc18M52l$&QuNvFf5i`m(8`e&ud
zYYCFz5UpB`a(FFlm<GmtRC+mEeLO2;a2-`1ux7uUi9QHA0+H|=b9T($w(BFR#52xG
zp{@PM<P5ps)#B+&ta8~D_P4jR0i@(p!h21Sl8T`H9_~3)Yn`k-QCh~EX@6(!vf=T;
zs-q+lfNkR;$C|LSxsgEkmALXnXpEGRY+a)bd1Eb&%S1sRRQm;<N!3CACbn9(m;V4&
zfu1Z00OGOj9mR#%igB^CkM5W8VWUL>+Tk5?NTWTaLOI1(hIF@AC+2#i?6Eu;xFjDR
znzb+m##E~883%(?vbxzz&ISzxuObzWH2?#_scu=s4rG2Y)mpg$myJ|P+3e<)2!gqJ
z0SbDhWZN&e4n}Hu8Co!zCmhm{2647c=x}qzSz5@+76#TY{E6&P7bMAyN4FkoF)Ks{
z3H;FPh4zadw|rT~eAVpAvMJhs=CZmy%gcC!amIPAhRFEYgZD*nG6aq+FSd!6Zt9C!
zhCm-9XceWBW`&EQjyg4**AjV9xns%BDP%(#sIuXM4yv_eQZcgvt}mc;QqH;Ns%QC}
zsM-Z&lH_2EIi5L}=kXP>^tIK)U#GOkY3`}NPjvk!Qq~sJRk<)m#{s`J*wggVeJ$4G
zOL28<!y$*&ay?3M_N5xObv-^Z-lcpR&q-yhkvk-FQR`N$mrSvOMm$rrBPEr&M^dh%
z$fvE`>AGp7$j=q>oY8ADJ=UH4Y^6zki9*el{_57s{Yfy#Ai<$E%WwLvzUT(W{8d%9
zpL1l$vb%22b#BPcHr+9;S*^Mr)t|VBE#{eFcN+lD<Gj;t2sZx!<ZOUE*Rg9PY!h$%
z);dLr+NN41)N@0KMt<pFF~r^%?y7BrTCWes)joLUxm@ljDRv@1(P?|^T|i?B9G|+b
zOK}8=3XeW2Z8|w&Trk`|=<OzZFZzU^x#*4wuBW2KJxbj8MlVs5SX^RgG-V4Cl2?IN
zu3}`s0;@EWZ#u|gY~b-$q7Y#O064E~q|xS#kEm@HK;UCE7LvZyLN+6cxz!pFl5RgC
z;<FK6dv_!g-FXjPN##v<<n(@)r7SHly-MmUR~x%ETd3U6CA%1ul1CLi^{~4E&S?D#
z!b_p;Alz5(xfpT#l2MAbZ<LlO?vb3(W4_Bg-w~dwZBFsyzKRkH2R<umeLE`;0E!ZS
zN=)t~02Un@=|4==-sg<U{X+zwGAbGgOd|t>P{L%jV}qR5m9#7T1;>hAKhrl2c&|F;
z0pwLN^sP&L#)&Ubz?bA_7x!0Egh(3){aB)ni$HQ?Ao)11n-ztfyg&#PV0}<JhwQ?f
z!lt^v{{SE3^EvO<IpRDR4K_dj063{t&)sY^-J7;LftmqhiYXdMp$nfiJTj1SK_<Ie
z9W1o?wn^E})n|@r?nt_x+StNW1Cl&eHhUSMx|N)<Qh2R0UbsLuP@02DD8|p_PpZ<b
zMm>$uUJXaHE!Ad9{w0-9)d^{A%B%q-R+Y4(%HJM-s+!^%?HsXOAUxNS)Ouc{1Y2s+
z-@&fOmF-WGlU78DgE_&f+gr%q^dTgu`m4QG1-gXFMsYw|Si%8iP#9K7$~$>CM&sB;
z{L$<R2pQ_D%s^$BgY{3pxA!m!Ac`UnU|WFy0LV~Oqrgxty2UXez!<2nu%s*_{wecG
z2qjd2YB={scQE6{I$l{k@Hvd;(*)y+s9RAhl#u6|Hs<legvLScrwIvwN~h)ZR>in-
zEQ;%kc;Zcn02G=-D$L3UbshZb8wAotaTHLOQ}e5-xPmW^LNy5YIU=hr;tao$R!Z$G
zKH~#WI;XbmeyHPaE`({B`<2=Xnypw#kc{IrWz?me@#2M#Bjist)P<xSwo47Mjl_kC
z#acr>#LglI3slkEOeW-ID%RHa5rVzdd1EiD85b71dZ)~>ENOKMVJtff5GuG`YBD$Y
zqxgN*rG&oX?YI~su}(6N$+6SE4koeekUVX`trnju+(x|eJE|BqqbVmV<W%o*ws6pV
zSIDoksZsKCL}L-7p7UOXPT!CG(3ZEJ`i01&guxfv3;y)>LK3AV-px};QKJN&>618M
z7XqH(EMRbI5Cc)Riew>IkUOd+R=0Lgyq+sos216n;MEQNyEFTM5I*T-hzTFL7c6)^
zifocIs0uvyN}>1iz>m!$u?}!42tE@y86yk#N&-8R(|8m_mokelYhvv@=gn6%l5P!(
z1TQp(4B|7!D$?nSI<OfvnYWOnf?dQ`g4$7Zip;=qS4G+ZGdsG86l3ocz}cH0jGWV!
zOfiKH<Q?LX(+)`l3~@z$gz=wjpBhr4XB)=iXjt6xLgy51p&$C5DUz}jdgY>{??Ho9
z;Qs)ZlZx1CHezIFQ`qxaOUON!F}OLZr5Yn3Gf3p7a--^z8Al8<bKN-%Vlo%MH3$TZ
zuiYUJN=nI|^VKXRcYwzf=(gwNGyT+TtX$(6s6y{cyhM;9@y|7XGe)rjtNg1G3mBhs
za7Q(&({7gP1Rh0O2Wo&F7y$roGu1eTE)LEXnKH;60How`UWuhv!*(jf5Z23W*Z|Lp
zMMxEi9I&K;m_GIxT9N&{<Y99_gm)HeC9_EWdaSj?ZEq_mPBX!+TrSuu$0I!&gI$6r
zgfvKZfz2e*449c!{{V&$)hXW3mQn$wz{?324W1~wS(+#eZmP=OPjy)V6DB|gLjBRw
zTE-Me#D(!us0}bGzZ#zHo<JAk2S8LHHo+P9ROD6nrY^RYVg?UYNY>jU-tvF7O9M*@
z4-fGkDWd3<Z6cCA=Xw$wp-+M`lkVwHL+XICx)&D9C?6#p(bo4N(EFIrio1dm+y%D}
zy?2Hj8cTR%XhVVW`lv&hquS>L8j>`2k1F78qCqcrdqr`#^Hn;~_iO}f{Fjny<e4Q=
zyB~@k=Gs<V1OOiBRjMJ1C8BH)NyRsOfwPmJsxwxCOKYu|Bp);r&#}TX;P^FI30dZs
zY8Bc(>0jN5`)<Q;V-)6%*J%z2Jkx*<-{i^1Mxr3eH%N-a;H_V1cZI-31eNZp<c=kQ
zTY8Xt9%!8^SfIQ{j1U`+X`=ZdsGdhLs;?QZ9FPNKlif)aiJx$;R}~scT2MgZtU)BV
z7@rILaml7#GJAv*oYdDy!w!QAdjf#FZNzLGj_8n`Epr<y{{Rv=tcIy-Ai8i56|aV4
zv?(OF)m8d}-6e?3szZ+HWQb+%By~lOHUt`EavlRo&zfqZ`-db9Q3e%6_Z$owKnXI%
z?uJZ}j%ivnE4nxN9~9XbZ2~dfdfkX83-TUncqIqvp{=I*4Cl|)Y3;&Fp|^92$4K-4
z03n<X&1pS7aI@P4BnK4HVJvM4Ns36=Uj~jWjk^t!DyHR#lrNFyiMMV40372q)Iwn^
zJiv8|liQOe)cj9$$l_q*kVl%DXJw6&I*ky{`hwLJeZ)Cq-YS@f8xwFJ?OKWB-x~s%
zsvEqdg!^3#aw@i{jRV}y;|Qb>JE6qQ9z<ZB)gbsVW*IHwnXrf4NC!PoR0<@3x;S7t
zJ=K+ztS*(q;YVVtebq9B=;N9pT;wX?R3WQ{m)%lM0i&l16L9b7RV{?^%L;&UR<5Ji
zZLyq_{{S@h8W!4O_U#+4ReCh&VUduP2O^+%k{Ch&0B^c3##U3w_~V)klzHve>H;QE
z2<X;lQH(`1FN)SlBCZ*Ki5_b^>K)QYxey$7MI%-UD^gH#yNq>C&c-JP40xa(SekVO
zh~ppcL_F!WVJtJ?($F@R4=P6p2js{7H5@lS)S^<S6e^VVi^xcS6oykgVjwvj=AuEP
zVWv#C(e6?KUdP=QNo=#@9RXO)9{ziKkbn?6ts6+r18?GrRvHPJ&i??46{oJHG<N3M
zfC&C^%|ifdgYB!2?vgb{FARrq9%$`MlzFz`DQtuHS)Wnbv@?lUgsb@@m2yEFJZ7uz
z*Y{AeF$`9XCQ38#T{1`?1N*1XEVC#0SfMAn68qsc-{X#jJWNpnsT;fIs+1EI!Y{GX
z#j3`jtl)!9TWv~WK*>LJ$u4CKK4g!fMp)lT8(<Tfui5(-*n{v%W25R)^4Y~&4Fw!1
zx#S9$Pt<ZYJCyv<#ECFta=`FtEJ(5#;~s(K(yb$nMY@6aLSF)qh@}cS<BGMODIvxV
zSIq@}Ba{Hck<~WE;IOJPG#DsxRX(Y)H`eQW66QqY;;m+7Osk*jn+v&3fDMi*S|O8;
z?n#kGFjQcQxzpv5VG}<fc&zTB?pohUjy8haVvn$u%wbv1$}nQHLkgLIkUu%<pQxxy
zQW)fsK}Ru-aGzB9<(6wFa-@(kSE3X^26OF_-(%{Z9E}`>20la01p~BGZwIr;qJoK;
zq(%xr=+b~?#FIEGya7oXsgIMADr-jw%p@7esOC_kDhMBS2wCV-TAA?4haFau4ZDi+
z2^EpiHXFsy@+P(hRa|^<$|#_sE4ZYD$L?~bfYoM<wh&=J>ZG~9l1aiiCp`*eyHZfa
z198Zp&`idvQbk}mthLa@xKD_!cCQ*UODIwgfmrK?KJi=}=CaLG3MPkiOs=oAgOQ5*
zOY{rZcDGhbqhHCobr?d%zGsi*K23~unz;1tr6!%I#XJsC$B=6ds|v{v*{@BPWRJBq
zjT#HY_aF)!{Z?OH)2=P;HhXzL)l+_%dfscBb%yRu=NuAisKEC3h#Eu;2=iVySLyh5
zQn$pB>9A_@U{%^VRlUR8VRm!ojdqIdligfmwO*w3x?aj9vjQ?aRu@)+`%1aQ(YYDm
zfyHpOUYVB@waWWCm~~LiRl_ay58B0N>>&9ts;xguD)|=}&w^<zZ)3QW2>~ru7Z!5N
zRjr!@c&=OB4~sWP{EmxGoA+wXHsu~F##?tXDv1U@s2CpV8<^prrr?oR)|R^@Hw}`a
zx;~ld{{YIK>Ha9kuGPE>muw;%5SD)7U-qdc&|{L*_ehvy-C2v61)>}S#{#UZ)y<$X
z0krt9SC31F7vU6+47C|2`7;xxNhOA$5s3&+D`zB;n_Q!1m~aIFb)>~{Igv&v+Y4yz
zqbKC}9%|<od=)lQ%ceBf1Q2rF)^A$TUe@O-q~zB?w107$Tn*h*<*`{1%IX=9ip8k(
z{5g}gPwd%)TbB#r@L}Oa7V4XDYe(t35pM~brfQ<s(&o<a6^}8K;8UUXKeo#Q#=`@#
zT%9hG8Vv4PU-)L+*=MT6mOuSALMCCmeM*tB7*o|!>6+EO$q|B8kKa{iZ!tnaZhRW=
z;?0jVcR|_7$%<IrNm1*I$ZgKaWdZ*HG|LT1CYZ+?hamW>A647PZWdrp-y~H|nR0D4
z6_Fo;d9Hq6P?kMP!sEm8YKD?B!R}enZYEWPx14iSQ%qgXdZ{k%jM1r6!5q{`p_4V%
zIrTym-H)o0FKJi!WYG{tUL&{w1qkybQk4AB1z9u^#_y6aDAj>+m0XH_lyNjg_{J%*
z&GKNqP#4Vz@F<l|02NKGX-zzGw0OsOt4V)w@?;;1_^B=qEZND&8KCL##*M|=If#5n
z_N082;LtaO+P>m=J=Lpims7Z80CUNz4^zn?Mp*D#ycS$aIprAjXReM%D8|X-Q*_(Y
z1xIxi<Vw6y8VvVg2$4$E*O!tRJ*}PjrS{~dMbdQX0+^Ad&UqbE-W!J>BZa3Md4UeY
z0;IaRW<ZKM7^mF)0+<|a8p<MYIrCoCw#N_nPc%$=db0cSIL~5+y4cxF(?zYole0XK
zO>rq_ClxS@+nGV=)rz2n2tI1Q?6bHD$p*Eoj$9#Pt(Q|cTP!FU#Z@ZFJWe{IqJ7YX
zZ{!D63m<K=Ki;c{#EtkU=Cm=E1XVQhqDoP?tuCb?8945<_X`uTEWFoC1g&x{wldn{
zHk3vW2Cgk_ZPmTcHB$^w=YRqGqHN)vu0~w@blDa3=ym%p<Gu+Xg>Zk`j<}vQfHvN0
zJ3D)20N@s)CXO<=DluH?xY*Wlcwz_`zIf`G<%90o#UFVTkuVIrQejy!CI=h}`!VvC
z0*>mz(Xd5d>C-&%f<|%OQ%MYaK#dM)jXr5$lt#de(?y^cSR4LbQ6phe2>AG@7k9V4
zMri1e$!a<$>a1ZSi6g;42R<litvACWb@fCnMP>z?ZF+V-_7|!YLoah>?Pq&%4Mp)_
zp;w<ZpuMs$8xS&0QwVl31mM+80D^t&zStNabsAboazkenbNgf}u)+JNu0AsDOyE!e
zHPY@-6M{RVbi0Cs+hLj(LRAX^jMCe=hE#lprjHDUhSoj5<WY?GQar(r$o~MfEwc$@
z41b9g9mFgc5Hd4Y&_*Ca%oDTXrME0oVHh<8%&{;Y*~#-$#&#k<bifhkG5i>03aQlN
zc9Y-s_X@YVP0|NaGJIE|i+1F7;EGwRK{66FYa@IDgW`u=`=1fTSn9T(#nS`8&P4{T
zfOaoGbwLOwVy7Kfp;;aGbDEKriOhq(s(1-RZU#*P5J<Uj82<phA7MDQkN|j~<_>Un
z)X5tNH?iWGIsvbOX&xnXJHh_|#SsdtHuu2Cbw{Gzy5a=v{nf&&?+8K3?yg^CC%6Jd
z0S*O3id?EcS^iZaJ7o6B82Y9xi?oB#=+p>MShKXAyw{`$1&G2AH4*^v<Tg!6#%97d
z@g2~xlsEPpW-=;*8<AC$XWR?V6{fvKD8M%$aa6LwBYd{t)h$y23~^4sA=iOTGUfI%
zoz*hV#Ws{b4>Z;;3b~7tc%p#MIadG?$gg|EN3)~n^HNU(k|Zm%{ZnQ|W?iIi6jC8!
zqv|FG5wZ=Zio1}jC}_sh=M|HTI=)=;4n=A7#c8d?o_vbB6hsid<gqAK$L5N(R#C_&
z>{C@pP(a2zqY$xxl)xTof((GXq!!vg(?m$K`yiYF{{YPn2ofkf?x`-GOef2u)dCcr
zqJiXtcwR@E%z&)N-Q<p|cY71d4&X-9*sN}=0z+`vq>`pW5hzk43y^%#8_7TndsK0n
z8WlVq$)iYP2Ha%kg8<3kMNr`UzOznN!j67Jo-5c~lzs>lG%O>Xm^Od92!^xMlfBeN
zHaw5NG<<FCN>q;PEumdY71=q!V~p0Y#=r+|Nvn`hasbK@9QiZ@Lm$5J?#6neB4;ce
zJa<su5G40Ue^ml{2uv(ou1KNp&g05qO&GHhyO|Ewrrao+4ZI9g0fxVZG`VtkAN<sK
zXSL62wQH`}4Z=X!{MAzt1Yl=5>{V?LgK<NGz0~r`J%v<E5!n?AGQ09K#}un6iORD7
z0L?}MaWeoT@ZHgRY~Qrh5Gl%@#RkesiE;=%Vx_e)rNX;&nw@e(VJb%=85H9*+h_LF
zmtGH54w*D<g=3V*BBzpI7=a#8byhBh4z*nJemrDQ(ayr@kc^6aa{M=E0~GIIuvtLK
zJ&{JpPaE6&I3RLsH|p>sb@omH9M$}G#h8KPnxyp*PiTiiNHmgGLpbcq2yNMFd0p0G
zV0rUUKEn8X;CQ2b#d~zh?LdF#k$@BtA_A+26*abGW(4iWnr*~o(90U^1OEUtmWA1P
zQ=c^nMb3?J9jt**%W=hN;@ZSKLyt9we|i~__$!`&6|K_bl1P=JImo6&EG1-T`HYd(
zIZAS@a0sa7M|=h$Sa)7Al#n@aR0@q)Nv2gxbqvRDC`EgBfzuht;-tBi<N=5A#Sd!<
z+F6Af;D$=}6ib;Ht_FCac95IMC!P&ldW0rRD)XFGTvHWIthvXEt$;8AHM-@wUli+E
z(lk3q%QXx!nGvp3>>gCmQ?n$$S>}QdMU--x?lu+(YVS^W5VA9F1I=WVp_km=cLCz9
z{VjcqiD8%)Ru~jld=kwKn*?piB@$#p{{R*{ywH$a1rF%g@&Tt==0;|a?LYj}kRm`>
zg%WURMvgek1V1eEM$D1NW(-+kPc>b+%Y?>B^F<KP`hqn{x9!e<@l_?mIBmy{>hskS
zK0UI%R(dyB!74IOidKL=M+=gycA75MNW^OX={caKk8<v27(e)@Dx$&)HcydC`5|X4
zvIQ79Q|7gr6p@=X8LU?JlG#x~Cp?<E^tIWFe`&MdnhAIrGhE>vGEY7;KoJ{1w#YXc
zCh}i#K*$*Spvhg{;pZMHTxeh|ddU_b(Wc@%pn)xHHm=RSYewvi06buhb6KrRa?HxS
zsrstqf>3(C({xF58s&!s)mD`4EP)*DQCiEOlWDIWGI7WESYU1~?ipD~Ak}Hp12%ES
ztg2%<6yq_DDI&%ODz`$pX<_B@j}&#>i6yfr;fI>h7idE@Dib+j@m$oXl4IQPDUz(L
zr0xU*)jUeg5QK_=JQ-LsZswLLWGS{^;o_r35uS*rvx1ud%@PLa`$hT{>ou%rRgqb~
zK-NP=jJ4`7kZWNbyT~$l$BHOqHs!5kAyqcN-3fFGL~-(C#%gH@_nAl^-7*-}Rit*@
zd<q=|%xb>jBn+Rr%3dO@%;54XajY-|g#a<ER-qYdg1_pl`4CV5JIN$<;)sOiMI+)x
z4ndA>r@DJKBpwY=71`N8Nm=HeD90_4fnQDAtWaqSBMdU7e24l|>J-p5h_0>%^v_lF
zq<*lm)+|t45DSh)c`a-srOP~z$?0^`dEpqM#@o~}`~J+QXeR*G%=T8Aji@rhvMxKt
za?N?<w73$ke#L105$danm|40PJy$P7>M_s6`#Ktp6tOMv=r;cV(l(05OEI<ns-o`R
z`%lho%VWqD(W5BqxQ(akojg$#8>3d^$g4FTmxEC4@KuXllOG+NrrP*i-XchhA3YIU
z`&&UKmQxsFyjQgi3ew{f!y2T3=YdmO%F)KLN;Z|^kEiKgmQsr8lUJx&(tb?OyChdh
z1i2eqfm>j}+s1kX3e0+r({;>Kp&jP58eE^avQ%yz2UXJYVtoXng^8Q7ii!ygk(5!2
zmv{ZM40Tak$ru5L7^d7Q7M&i+-V@m7tj_J63<?T22|G#$C(SbU@q}j_Rc>?|A2p;G
zOjb-9&U%YMiBYA50gtLC((-9;8{2-oRk4m)perEkRXHNFMaD|eZaK>wl%jL{S<FDG
z;HmDeG_7wEh^IT0^nR+V)z<$2^!H{#!K{vts9s5bA}noyc+GiVZ$5)F*w058;?`o_
zBes~}yR<5B%EP+G>OPsfH{tE1D;{&26G-(FSxT^8zSLuYO>Jzo2xU=*45y>_UV{Yi
z>!7CBDB|UYT_l`S;ey)R`!TFCHgG$s?c&YG(0(*?*~Ka|h98Oo8J0c6+yJSrmq@|e
zEF@7HVOs+n)_(H_X2H!Bb8z<=cw2HW^2G)de|e~#g(BBPnT#b5J}8?|qITYR%~ROO
zV7MwlAd0fKZ+wuWZ9!$&c`A~&4|QGYYlOGcq+&=rc&TJ56vNz<emSZiRNo}0*jG}x
z=CkT`5y6elMVfq1EYwkfivC~RJ~9g)Y1feZr`_!LQ))0GxOnlOH1-5r02jw~;pZ*V
z+`OK)*40RYHbZaxKq7#U11|2!>V?!Uzi%eba|D_?(c&-$Vlzi$k0e!ALvwj1&8#ZA
zJBE2kWxy0o<3{X4xHTQ2jLWt9tD=%!5pswwZl@r}2{bHL_a(zLgrBND@uC?Kj8OKC
zGCMXtHDq{PB5rgx_F#$rPQa?Az7fh<Q*{}C8{A3qI24Oz98u)p;;)9(xe~;&d9E%!
z%wT;{SAYS6Fl*WYSGg)MF;eTXsBR5b-Q=LkxJ;qc##1?}c~I>q6`^QSQJfwsvh=Kb
zhTf}oDfk6ZN|4O5dx44kRj!|QN%sA}l~)<uN&HSK#?p8)2*&Eula<OAE{jN`x?{ag
z2h|;8?`|f3s=5iTT0ylA+OVE!R^K@0x!DtZ7j&$|J-C)X@^S@JY6xMJI<8ZyxwkCR
zG6RuN&$dWpjISfA`$X~iK2v2o$P;Uf4EU!RMgz7vs`oA?VJ|r9nk68O=OA@d(F^+x
zK&E1DRQWY&4aWm%;C)tEZdBt7#TjjJBu60SXr@av&4~bJ9E$TseWJ_{`&2d;0(e0O
znmQ-IA)7o?a)J#54Wu?mHASpjm5|7Tj_W~%1)OzJT&y$4jkK?j87whnW;|!AGIfeX
z1$baN{M7fhk-~+F`Qn2}SRLW=DL@Z8FpO>-XNr;-9IpMmVu^TKQI^I>RTGK5tQOh@
z5C&+|oQPyS^;YG?WGi!%#b;tDtCHA%6?tn9?k$7+nIf%>tc17G9je3vJYtp@{lx?T
zHu$eD;JY8c6xC9#@M1@iOoB%yJ<faaNNyEDSK$4MDD2*(twD1XGC9D&rm_`U#h{)O
zC~d@YDveQ~P)-g{RkBzR#M>}NIjp9uX(CKrPFFouGeicueZvG}9a9oH79GQykzHLt
zWcdnq{Qm%q{72m=31HnMLN-tV^HEMcrU|%^I~3s|iNBWbsz`w@(@HXjvp^ws0Q<s3
zY%=r3Y3(Izg$_;#7~-<qkq+p%81AfeOYt1xANZ3`2jPIfHk+GyCZtFs!CYkiYF~SL
z5S}P-Bz9&-A+gO^FtNFbu^f4)5jzHMi12IHudwBgSa_trxiWDY;Ew1&(Fj5)lP}3N
z9<^w~-Diw}j{=EAG62dkflB~ql)*R2SaJBM<7fpNMIxwufrEWi2qZ#TiOBvS)t%&@
zu#o`YKp?-OJSgbZWy9|ww{~-ysa=tTGf5?}Vn+HESnl@1;CUvYxRIl9z?zl^Gb?VV
zNC&P80zOA0>Z~+PLS>jjfbBJ1A}Enj26I!vx*3Bj;}l})klkCLk`g50vBgaRSk$W#
z$i^zOM!IXI0QjP=L0N+VR|J-Lk{}oiiieg>!vqhymSFN*?qBrwD0s<e$o@F$r4Uf<
zC8NgJQ<}``(IjuX1>4PNGnF8YYBOgX*6#lR>Kuw$A{i2{CI@Ln9N^N!aDBq6eD_3Z
zQWtD8@;J>`fbp;x_><it<N<T$VsJ)jv4bMCMN|>yp?BEhVa-VkbG?@U)aZk)H^lfY
z@dvuuX?N(>qb<F~V-S$+SY|(}x%As6@&g`vT4cy+0~cM&N@u!aBbfrX$bHk`43WO^
zkVh2TdBkLMpT#-}JH{a)S+c$bM(oBw7-4#(xwziwO7gVJiJit3cr^$?sX-K|$=c$v
z*LLPPnRCW?Cbq1mJ5or=Ca8Tu3Sn3d`=yZzOK2Fm+Cax7)Kjn$7GsbotAJx*5cVoO
zM4Nr(#zifVYB?&QPsWx)SnX5(;=Kq`3C0xlQjLf<?eUz{uoj-3F7k+*Kkr(h0aL&Q
zM~cVkGE8PhL(##liDyY;ec9k-QzjHeEP=ivJ)`QTwwR)b$<BVNE6bAi7-)iIc&}R!
zuB0#ZMRX98HU<}iRXUU}I=(P|>fda8x3|cD@m`(?j7CHofBm#NA<k}M33OAEL8-}i
zgJO*1#an6+z%EG%hdyX?7==B^j{N@sDy^^w%VBJNHT#s8GXsr))h)6X_=B{L#TjQQ
zM)<Da$N1Dbq)aZ0NANyrQ>GF<*_YZa$L6&<48d)-t^pO2jv!@HPZ{p6bepvof*f)8
zOuHef=0m;5UE-o={zC^CqF`VlZivMl>A3A~7U3>{7*n*GE>*}%MBH$Q3vp9G?2)><
zj1lIEK(?;=O}zX~4=@sfLvvE>g&wsGZ*Ez#4tT{-N{hGL&I%l3?zLKk4AGFop^;gO
zpt8&qpi&PsC)fz*x{xZOj4AIFL`Kd?$d@_W#%Z(KvLA5aMm{ty(<AV3JkpSsC}Io>
zjHv9?_9iIx5Rnf2(C+OBDH`QkiPDgM;Ys1U#T3YEUf?PuAx22<qLA)v^|$Jy6GtWk
zU=hY?tlK2Oekrbk5o}z-&z`y!1-#NrD=Q{HB9YvN+Kq~JyKM|h0Dh^HAyKT`Ng}&D
zd`FtfMJ#Y!vbQ|(TRmLn-a;E6ipG_SLfJntLO^7fj3iOF;!o8v7u!&v{EaUgR1cB!
zPh}!SQQwMHzz4+gug8H_+CvELkr{z4=BbK1<dc>aW1=tp^kIhPqKIoq4#5MSD3+cE
zO@TmgIsDX8!TuoQ;%QdhWI*8Ig)>NNJqXG&jFNg4U#X_R<8BTp^^REX6qead)WX~i
z#B#l#x~fqa%}13v#s+GO)Yfkq_s|j=v=>A}bGA+a>YI46+}pG{<c>J3^Hd(p=7S)O
z0pPLh8oJ6GXUhDKHJSFF+E-U%jHiKC)>D~b+U<i+2@1fOStKmS3z}+%6W|TR^@?@V
zZk(=uAlI_-$g-y!Y3x#K+jmtA!1F`Lx+A>ck>ZJaiKm1!H^+8Jrrb*bVnXxvM1gur
zv)&}h!Ou0M_aqYX#AdTPCBgmDE-{mVTL~<m+YIl5d7^@WSSqtKa!m-5HXkEDRA^w6
zh}dqS+-#pDZCvwEB_68Z43#^DV>Kxo_e}hQT_at!R)l+C4a=J6n!qYdU}f{ksxov!
zwnSp#%TjGV;O9A}fbGU{PKfPqmpC;*#;4Pk=Hby!F`qTreHziisHjK8R}|3i3~UJa
zis^od)T0t8XBa=4@*mVVCpb2GUr5Uo=-kbyt;NHn@sg^1)wiUlvxi!V<2cFYp@nSZ
zxAy`DBxb4BQ62v1M$8)VI((ko)RO4+8B)mxj_(uO%AwVi=jxI|G6Cwbo|M$17VGX!
zm(L)d6|ncAomJxGV!fsu%QiJMd7O}V=e3VdZDJ&_$fQ_fxc)#N%>j3LZ5w-1xL^0E
z+fuhg*$m(uebC%31YcO8_;TM=EV0dI$vS5r>J(;~dG7<b=N@R5Pjnpokk!@-8}Ycd
z_aivS<WNwfd%Hi=Q`;x)jEVk})k|#{_RNfNO5H9&Oz@8bgONh&%?x%=BOXBNh!cgA
z+sE?g&>o?HJ)16Z#%ipI(`aZ`<z^}^Tv=)hFk%#*MQ-%%P7A0N)ydpC70hpKmT5v~
zkk!zAGiMZ5u>vqa9M>z-b7Yg0CE)6MhCFdmlzRj9mAc7hT&EfA);b8Fh!&3lyqenn
zpj|3hdkS(*bFbW_F_O)bSv_l<sV}2PrY7;Qi`e42QY7qkR$4x#C7dD_B&g`sMV-tN
ze2zb=HQ5SfNUlyS`EbhM=--zVB(}*KSp8E35$$&-sqZyGdm%AO$Qa`ps(q0M-6G`E
zqKY?}WkY}uRnz|fRLPmf4}%KO<HnqlI<AXotLgLrWU$FUbUBGputqUf8U@sHXz-{Y
zZAu2|gUIJR_^(rqd{O7*-Hn>h-%arx5$1z7_wj!QIQy%cB`a!FoD<Cx5~zpdpqg=8
z<fUC9?vP0Hwnyf%zM(^>>Ym2^;j5qtnkk4s6h12t>k$)b%FD}FH`Ou0>E$N5M(<3U
z_mq4xgHx3k7$kpm`+Kn^sYU0G>2+&iBpAm(bOnpY3j1UESC32MXQlYK(kbt3KYMt0
zxl#=uEwO^z+Ru*aTg4GLlnRb(#hM_v`BsHFNg}uUEA;3R7b`0k$?VZLUdjxh6tLSd
zT)5}O4|0Sy(@lGwvVMrRyo4|uQeIvN<G=%ts*#dbLa;RiVJJ_%$){BN3SVcTk!M20
z;L~Jg0OavZkg5X1#Q`+(%66zYtK$oN3Bhq-sS5kb*fd4kr10(sHJ$?GNU9v{J=OeH
z=k`Rec%;b%7j`rGb_j{GGI*+8L6_cdHELaq5-I*(Dwb#^_X%^&Xu`R+Epk`1(EFQD
z28g$XOp59S3#eNmIXpLmOtAgPkSj3?D|K9LFedEN!>LDXp&ftXj)qmcOs`c{1TZ=R
z<Q6q$V*^|Kf%u+<bFwJc9hTQ70?I%e+){TA#W~%QV))^^sM+IGGJpdg0+|)Sk+!(S
zeYdma1wObja3n<|J=J?j8BdTqyjAAA?fYAX4l0cyS>(#(3e6=<ERrd9$Im~Cnj4nr
zaKD-n<1x8bKh-3OGR3<b8d4WlSMo%1TLQ22`^ct?A@YB^!N((isGQN(R}T>kgwafv
zZ&ARDutD)sWEl7WVyiT%mUxtdk~^yqfs+~Hi{L4MY7o4jspmCLBbwdFZ{y~)VmuR;
z$?-y8T1ew0Qnlngnfa$ca)jojfQCDnSmwPvD5Zm@F;NS!sqLzsC<;p|q>qJM8Y0f<
z+7LGqxxo}9QM`etR{=+gjf=>*$DZh>(G6P39m*@C0ryfa$jJ;vXY`4TZL&f_6Wv@&
z;{f9-YQ+ACiQZ`Z50~9HV#;#8`KTn3cKlZ~L>~yZVD(Ib4#;<qt%2r*yRqFH9Opez
zE=dePH0yIAJ4gf{6ljJ*;v;V{&n=#*qilygAF909H1ogRU*tO#NhEF#@GwPFR0G>V
z2Xc{5k+BL#jt_MYxJ!aTBghpqFxt<z<KlxL`9h7V2GRMaLx4^=Bamt|3fKUaWA{(G
zbs#AlSaHQb8XXS%-axEy%l`nHwT@7d7G6UPR#!wT1mZ^|k0P}(%NcBi;e}kCpoA*M
z*dS#2#X2NOcajF*{wvn3h)Dw`k!4XM1><*OfJkmz<Rgs;$C{c5wx})%KZ<_glDk1_
zlG@-zPE<R6QA~w)yKI*bf)wOdW_4YgLY?Y*t-BPF!^kijz^wMWV6n_(i#(qclKUZ4
zkjWydNI4@Sn=kmvga?Dq=B0?sxUO=;%|`MSl~uqaJ}L?m?x}$7PI27_Y|ZZNbznZt
zA7?6i+jfn=nu_STmB>}w_9}Ej)6=(&9odYG1JSKBp_k<$z^p}?Q5Yeca-5pdS<fV}
zDQC*#in1@jQDUQOVF?Opj~bQ<yPvUMf&T!<wDbMc-?mvHX*b}CauL0#5-!&mJ(_<B
zz7RJ9<WpiMHyh4IJkmL0=2*eN@9L+L8NGJeXxN>fAG}o|Ewrw1N#IuFQ&TRTAtNUh
zin_I#8DtC%R@E{xIN_Mdk}--UU1N=v(6b(?tGE#E=Xcc+?xx(ju1x?C+ZByV$fW1R
z9brDpsN{<agZ}_DBr@hur80+#n4pSMq+=Z#7s(Bhdy>K~8GutzO0hGkP7Zmh%^LBf
zxBa^@I6PI9O&%Ff@~c-t3ct5Q6l{m&Y8E>o-5|m0y%Cw?=f{eCj*~;R$2|%p7A#K|
z8HN|Cuhrp>OKBHr1PVPA!45Iq4=V3QS3f2cA0aVPLi@%aka{)df=Q*z9IE$5UBK~6
zBy2f7H3)YIc9y{h$C|7ZF(Yv{(fn$60ye-IrU}>)yZ|ZbA{fazEKNcaTTanslYyMp
zqf59qs3a$xVzLNGQn?)USKgg6A~Z~PpA^ZExRe!}8-spmgbMKP3^xxH3be7wouG<7
z&)Ouhz-Al@BMfPr6;xjQRIVpPDn}=wQ6#M#z`Bfvspq&e9sJ~dQnD0!ylx^cV=CWu
zL+{J&ounwwRjJl}+nW{u5Jy#$n`i(G{CmwKBM6AJQG21r?*gNRp5ju|Jw)tC2mLf{
zy{st^kTE&$G%N%`)n6FTJyACECB58dX3*bsEx9i{Y(dA9OnLKl@&$^Ti?SPNAKY!q
zNL+PBTb6usLg$=SW|c3$98ur`e9_4R!68W`;2exogf{h}*dxw<s3%R13!XgGa}y-P
zCjy=wSP0HopEVMq5nfEvhK42a-DB@9W=n@9@y%^WM3584c&yg2*u7|q@J%e4D6Ky6
zOvx@#bxnduqd9T79;nv#ZQ{g;O8#gWo;5qdWqeZD4+&#88EzC&Fi4VKDn{5XRhJ;j
zGa4WEq|?Y-wL8vK&<U$m9{5c1J2XQSXWFBh7AA<{@^X8kqjXmTZb|c2SVe_G&Hhev
zQAo@ClafM;C1hD9IZy)BHz>aD=3E2S6hLiVpnErU;DJ~v#ECb@Jeu5U@fERl09HN7
zEtxpy#Yqas$C9B~?ea}42X|JC<cdPM0aWKbRMx|4wm51Q0$YEWVIVmJ#Xn7yL4K^+
z<nclN<)fP;p9YDulis~dpvT=38)+pH#-u+(s+DCdQ4rt`Nvg<EOkt8ipCdGFllyUG
zuHsHA&H_z$avo6{sn6X<As}J9^HMg=g@D6S4YIBVGCt{DP{^RyiFp*V;~g3vE1`u_
zn8%ts)UY&ej9ZV*XWnNs{IC>tR7y4yeR{=i;uk6~M|iCLysW!|p17?=q^4Mo;tF&{
zQEPTTx0K5cLN!vpEQ}RpkgH*MCzhjNMQFXZKO@ywTFA=;h)Cr5rG&YV9mcFoLe?}1
z2_=Cb;}zyqlme-YR5pyiC47>6nwBEa&xRnO{{Y1hgjqt7Tp<IT_gdJcW`Gh&z^uFy
zL~o;1$pqGsHpU9BPabKa!ias(^6m?R`J@pUTsa^d^G^UUXCUq!3S+U{2*^KG84BHE
z;iF?7bM;t_X%D_#2lrboP9}2FzhkP%YORS1oby%W=!m3P*_n=UQMLyIIjL?phByZw
zijESX{QTBnhS~d)DIu|1-$sWws1`y;o@*s$rz9!()|1h*Ew%2}amnK~gVbW)2{dld
zNi3?)`|0^2{fGHa0C!b)(L3C{mgj>*=<~e#aey2X(G@BSt0eybAjtgJ%O@SVAKCBl
za|<1(Y7*JcnH;GV(me(07_VhE?8A48<=0V*(YbhI^;2k)+-W+5a;ago*GEs8;9Hab
z0K+bPG0SPLPPOXo8sham`yzLb0<$*SR<NJsmN^tD!L4qA>a9NJ1QR)qOkn4#(MI<7
z5pGE%kx$XCgz#Y0Rz%Kn&65h)(L?F@W`^8`$qYZ@t>FE@hiZ-i%}&<;01towekrm0
zsx|{%2(;0W^1-B?TOvinby3MI(ZpIY@uKC4nF!jX)JR;QjgyLcupySxB=-BHTrujX
zy<>cAZ7|XSyNc4Z3L(nzip_e<(~@g3qcq_ZeHy1UCnz-@$HL;Kw{&CenPrp}`5f0q
z(9}g_CUbxn%~0qbnS#tJE13_HYHwfb3!`Ztwu}D&RmM21nk_5RMpBzJ=gItusKCy8
z*HuaG-)jOFfmmDhx{@~bc_xmxoA(g2%yQ)UuSTsC4tTBxe9n1X+B?`VtT9nck4dwZ
z7;`iHyP*F7e|(y+?e@2}PX?~t#?CpRK;HQ&gZES{mfL8(Ut#gVGq1_UF-}m>jiBb4
z9FB>O>eofmZ!a!VS(9u)=LgM97&2y^MHFVu9C5ibS?OnxX<)uV@G8zbN1Xgy1k_N$
z?Sw@b0CiS&sPTZq9M`wS2M8#$%FbJ)+8u3aag|996nUwq{{R6Y&IjE#;!Ce<c0d(V
ze>Bq#h57#K!jZa+ft>7ayw^U~W@XeGNgNy=D`9)KJ4qP11x0edP{}hHFzuFZE0OCC
z@u**H>v}{#+-%Y<%3MTtur*mD!xh)eb5+_5Q$+}7l#+ZJk5ago-X8<O?!3y!@@X@@
zvYBtIO&GK+i@S=(#dCEv@d$-fA2q234Ui<{kwC)HMov&<`Kz$WMKwiRTv-*>^w#hx
zCvmB*_f2o^9tjl{+d{(xQ*G`_Ce6HfqWa+sqvel<+TV9M28L(-%;aPWZN<x%+7lrC
zQ0~DFNx`cljrJ>c$>)kRBo+puv`Cgru=vnYn~7uvbGD2lWMg*~Zq}D#34gS482+f+
z8;2I+Cm1-YnQj$WdBqTuOLHQ7KmaI99j-@C)e_94&-4#;2B)w-M~bkSnGWX_RjWSe
z_bomo+d?}|ihJc}lro+LMlOmtO`QGIvcLHEVxd_eZ;?MQs_4aW&}}AJSVJ_E2@BwT
zRi)qVzi)(m8ltzpnm5Ezl0T}@=@YHc#M}}0Su-g|<UQX9I*0<xxxCR5#!<tC1XViC
zBMhG&D#G4lA_Z)Z?!L~M@}dn*5#ugb6-NZG9mp(jnXMeL$0qH~z!jUA2a(h;8%<}(
zsgPTjN0Ex2bHz2Llq#ugfnHS#H#aod82|ttJk@A}fd%(4q7;;bvh+<3>SSH$r)ch<
zCf{IfY<-G^E_6%h1fp^GT1#n|;FibYu^K5Kx*JKYTwdZh-B-a{A#Tz{vFCTl(k!9y
zH|0fE%O?jQ(`}5Kv4ggQha*`wsb!qH436tK;Z|~|gIgW>gaOVws{5Tb7h^h+`4vo~
zv^|tEY%$^^f;*_mks|9NjFQ)Dz6J#gB!k?;DZt=R7Kv?cfEbZ8^eWEMc`&Ib`wFLV
zwSyJ@s#{s5Yqe)QXNqXLWa!c`_S7(3`8Df%l$7l!Xg+FdLGQ)>Gq_`#jyN_KA$;*y
zE=U@-k(07HhB3*Z0gf5S2XO}(uS**$1GtR-X&|@wl6mu*h=g~8+-$P&eAaJJw`+Tb
z1pH4`riw>m(QVEs4OZGqxQJ{5of=850%xt=RzrmEgG5X1sf=yP=j1EUBSMVC0?K^T
zt=e0bVH|mo)nEwJ=dzMGjhsPt{{SQRK}ofkGbVHY0E%dcq=j5@pO!pP)~Nx=P*`Gz
zL<EjGSQ8n*6?dV|&zNL<kySkVtf8=@%^PUv+uocIc|UZ~l@QhG$Rbt@H*V<Hg}C=J
z%R$)PG^&8`G2K~MNTrz{oO!En$p?r7AA#TwY4*_Dhuoua$5imKv2CP*iV{=p5K!Re
zf(Utm89$EPRX(w9&jAW>NB(Nw3ysk+INgc`)<Wf9m1>O=%yfjIKj|aQGzbGJ54rrB
zx7IZBB*G9Ck3Wj9xU)-Q7GfA5E8>#DL@gK+7Ckqbe2fwU8uGqrlO#?+jmQJ+)9u4B
zF!0&)L<C7NNQgfWd#g<b<)oNEfR(D(U5wFf&MIvZ=1AXY0Z#7dr%u9b<+pHvss8KH
zoJ2u1>o;j2$T{Ph8b**i#>4unQ3aK@9LfA~L|I%RR*eQ!_fRA)C8^wU-(OUkdV70=
z5D22e5hxP^h%laOH>}v~+}YXztNFJ^J`O+Hf!5<`EPc`mKSGj3GTA$@5aa&<bs`Y5
zf0Sm3D|Z@XMe#mJs6X`_D-1Ot0#%sAOli@lG>^A?j!iN#Dk(_CDr|3zx!fucCdzRU
zSz{O^;MR^64yy5w<T<R!Edz#N0)1AGM!Ak?#s~u)Q$>)OM9IREG2J_35`hOSd!rUr
zj0F+yG(5K@-}r0_Y5atB$~0slIbl%Au#j1K`=-oHj9VlcY)DIiD`9E`qxB?GEWwau
z8mEFe8Q6ii{Z`Xbf(wLV94S@vSlLuU?7)(IVyk_CVjz-640e%Ek|l~f2{Lol298>M
zAUll}HM5se&7Zt@ph5c^**TAbbDFZ!Z=(`~b<TJP>Z@mS8U;*|PPKFUgl5Lk*c1Y7
z3`g5mE%7wyV#LF5REp0!LJ0yrxfNq#u|)gBu5sp`=~zK;p$N+(9OjvAD#a-qKbKW9
z#`2ajoPvHNbxpcE6h|J?z;bD9gqJF}Hv1#^u<!F&{YuO&(S|nNBX~8hwPsaWF@ss(
zQOqU1GX?h8am6bkRRye&Ge;_JK1bC!ERxQU#@Q9<u31BT!!aIdwgKZ~Gw1O^0*foF
z58B(QNdr0lLaUSD?&dtNlU_FxNjo?!SI;%?d?s+YQ&E1%Y4lt*qRr7?6>2}q1CBra
zwVm|r?9sez2nMa9o=Dka7~7tIs%zjRycrvC=fyfQBW}4gjU+*vZ#gvgU9seMQ*dY=
z`a$j~#{ktfsOZ{^Zo}b<xVRjCe>Fv{9r)SH{nE+sKs`ogW+Acd_@PXhxAzF(a0N$k
zye`&mM~am&*5MR_dm@v-2vXtD{7y1Drojnm#?`^jO-DJ}#GuYQG=7q;J=APR#2*yN
z0JT&x#GsB%8vu|g3b`ZA86;NT&|;@{IH+Qkmn*lAH0UED9@gU{a@7Mf%P5JT2sx)-
zHu3~N@M=hHwt!pt8e~9SXcg03gOQF6Vh9S~CeBT7b@=w_uZ9~j$gCsBleK@2eA2cn
z8*J<*IOGZ`Se%jOpq4$0n9np`l>m8tx!fjtCZ&Lm=Fmo?<=;7}^sqmB8nbbVi&1of
z>)U4c6m&;bC7W)YPXmrBUja3eTEqKs?7t?DvQrcrTzqleQX@!VPzlKHi-zW9Rb1qL
zYSm;dSz>S%8|8`(BVi%j<O5Gx0x6ImmMO72$Ttzsih(M<Q6ozls}JGTWo4In+C~Vi
zcCy3U+~X#*Qp7}tQlO6ls*)h<Cjf;fim{g8_g2h@1OdfVMo4YJcH{1fv$&L&DExU9
zOwh@ho8RzG@h~F>pJ^}p$3Kd-*RB27efZ%&{l=*wNfik~G3JplG#?a_Jj=(xnl4-=
z#!1iB3k}3+5KyOg-73qt?s3nGdjNfepZ;@?i~ws?jAvtmw1ZhKDdTNH9$tayR>C`&
zGlLo1;}pq}50Y()7jHlNscmg8E(<m?0M2STXGb9d@Oq-s`b#A~(vTQn=A-=#Q{5<e
zT{iVtzfi0}*+TMv)mq&`Qu7y93!dvW>L|dBIUs{eE`ViTM+`?`RDTyzagJ(hh6TPo
zR11x#J$+Vo5^F~)`OhC!V+m!DTqz*0HB|u~>W|G_+ynbUs62KmG^DAG7f#NN=pB8h
zc;h^tjdV>^87%~ZkOgs%(`NP`apdN@=)G67Slj(o^FLhEM;4{*N4oU9(#wi2Os})N
z5Y5*WVzZ?3tKb^0y0(Z$Sr0W^9qh2Vk7@eNbCxar9T_Cun()neJa)FqNhgv#S55Sm
zp7s_i6G&X1PES>t{XVgUbo9Ak8dv`S#dJ$37C6U}YrCM*EHK?J*~QlCJ;`m4NK+%O
zFi5EbvnN0EOqI4a7-E9E)L^uT$t-~sc_z0smnJ31bI<LrO4C;2DF7h<0OGQ`U#Vc#
z?p#3Nsy^TDwAO`v-K9p>>Y5JKqDn3<nK)onhB*`^;syah{{U>!(%G)kRGzr5bL%fv
zzo&H(45-WxAk>uhD`Ce6k<@!P(THJ(+ypMs&TEr?qH1fX>dcWg<w~d4{{U02t`x$*
z;*Gc_siC?{f@TtKEy=E4w^MN1j;51II8StgEkhwJ$3Kc9NrNHq1ye&gTau-?G;294
z$49#1$~@aT(~l!Reyee_-Twe;xU{&2Oer}gsaH*f?iOGPtskd!$gg!6;|f4h4K#4a
zEYxG<8FMH?--Az~XpzXPaWLb;)~4Fn#8)5%@F^{!_FW@o+Mr-nqoiSX0yFhqt4*ec
z2yKcy&TP`k?o3bcs%4H1FiwF<7$$?ekS<<!*rTe<`h#4JVrcE(<@OC(moza)blnx>
z#Vn4Zi@jf2Fve9|aXtk_XpcR=?gQn|im3Fc7dFOQkC`XUXoZu&yY1U3dBqq{dQwD`
zHoEd>{Xy=FXUWGEgTINNCmXAyYtTK#v$G6s9M>@yz+K3I<SSyhzOxB887_{`qse@i
zkoyEt5Zo}|RLjIxQ@8+4R~=$Cz7T~uG;PHH03VRWaCS;A+0@9b+5@x_0?O_22a1OF
zMNGCp0;ji?9hqvA)NowPh8uPx^ICDmN__BDJ=Pm-JXZ~t#ZMzej4m@(8eN38*2COl
z%}WGxi=tir6=bo+H%w<M4-O(dWSUEuV{{UEqFxxikog4AGoIIX6p7p54v2)sWmQ$&
zDr?j3!@$ACc_p`oK1M$#hik}WeTv*4HDb2yA!LDV?U{=P{{Wgk&k#(Dp6OjgY6>=U
zPyq7IrF#_aPDGDoj;fo&)m*SEAs>pgmQf^o5Ap)5WjjI5CN|dqx@A<b0vQEXt)y|z
zNj(1mbzyfXRyeBfVp*o#UkhCvZP#FL$b2^ljw1kn#azWDw08UYHCJJ09^vHG%D|#9
zvL_XrG^2DpXY_K5bS@Wp+I-cOypV{BHzagwsz_OWMoiT3d~iX@ud~IUEDDlD<OMl3
zoxF%8({9TB)wJ@=E>vTRuGcrcz+f@QHBLpvkPqC!AY_X4g-Y-_uPGP{w<ez^6?u;t
zs>l_mOuo^CoEnKhNmww*s3TQBA;A>l&ctWM0u$H=Yrqif8Lfn6f(MZB1~XX1Cn3XN
ziqq)Du5EE7y;a!y0BIqx+2W@_<#N3WA1c4&CyJH}ov2gltqq0R&9X*~faG^m#~LFZ
zj_Ls<LJ2E_;MB3o8AcnCL9en{&0#&5!-O2wCAhbow(?Z?tLxTzElRM<4y!iIkjUGI
z$oQ<%jWD(uBvHi<UAKKxZJ=g}pW7@4kUFSx(x6p58nCvymc|x=7;JHdG#Vi76Hqp@
z7>;(Dxs&{@y;}qES-Zfo0MVS2&T7X(zxNl}afYiP?41QmERGC*2i;8;_6i3I2Q=%5
zfneCcH8cs%2LMy1LJNiC{0haq;8Rfvhjf6i-fI5<(>j%vhNcbOptw`u)V{OS^o#Sh
z!pK|Wym_Q@cT^LdYEisQ;4i_Bb3<C0VIJf!w2Hgb9w}`aJYk1boFRZX80M9#U}00p
zCSQodALK<x6mo|Fa;xOmtym<hxaB;GJ*4-M(i{b)028QJa?QtqQ^j}f!eR-*q1xTn
zJ<^;L)j0{r$mEWy1ijI&Hz-yIBdWfELMUQD&v>kjoX;DpXOA_p)8me7Au_f|{zK}l
zizSbhghS)V?9_1qGw*NX{{Y2G401SB{yiFeaHwWgY*P?hQEp_)u{iK*Q5coW^aqLp
zHZjgWx-^#~;@|C1fEvUJJR~V0hdfqha$n<uK;Y)KS1B4>@89w~5m~KG1)x71@1$$U
zO1Fh~4ZdlT0|Nk$kFqJ#WI$g#e-#$MVprds_fZNFZ8&9QATu8(j*8Y7v-h`!l~3^%
zQlU|@t%7K{&?H$_JCB-;5We*F(OFD#KOa?PJdv3XAbx8rr=-H^WGr_5)|C57xOCmd
zYQ(}95RMWY64@Nk*JYk>aaRaw$su^g7XWord|nVT0s5yz8)-ZU<Vzk@jx$kSNF;_x
zqfyUP@oOuBTa^^}k^qE;%XNxcOeTKnX_nAJRD5cool%SJCBHSJ*DaY*Wr=t_)kVU=
zCZE4>Su>5bRc&Bpd~(WPaBdG`iAGT&V7VWf9t03XUC+zFr+fbDvMS*C{Lvvi+uA@$
z+`ZzgbbbE#3NWX@s#r@bggcbLqO5Nk3)Eow28!X3+9dl5h8QEdXxi?;^m;Ta+^mDF
zwmYvR<PD9$`>O(nJ-mwLnX<X!n3pmB<7|T`>WPL#l?X$-9ncpMF}&T@Dg=`HJ>={M
zBz5?#rl$nZN=O`Lw#v$q0^j><Kk7h4fs|!Wbdu<XdK59UPTU-EL?cOoagtQ`O-qF=
zV=?ahQ8P;!M2%zGr^Qx8hzzSI2nXh+jg)h@s*d8<_VTMEAD=ySMF>l_SL>>lgGP_j
z*COT|NTgt3QAJ%GpY)FEpGQlmBnnsnc>=CTcke%*DbutXENrEUTW@U-`3Hkh+*!0_
zJ0FqpOKWaa?u{FF-YUcPmzNuJ86*xks-==K?T)d^T_K4aBHUz;aP%sN)d>xzBzVJ?
z7_Be;9{yZ1#6wrF{V#c_T8QOMti2k~JysRmk!*CiU-q+{#BE@er)csSe!4Uxoy>(8
z@_&tOXY?MeBn=Q8XNrp7^z&C%4V)3xSN#1t{^9=sgP;DUM{594MhT}Fo7z_3Z9Z#a
z{({xap|El(ZaoL9I)4Hxe?JHPiToV@0QEDL6aN6*Dz60AiFTw4qykCo)K;E})*_W0
z!gxK^z0arhKV%_*mOg68>oC5gg?dcU?j><4{3Bey-6P0LuGY@p^GC;}UD>D-LfgLa
zRrj#EyZ-=*l%K_09I=;7sp69D1WPTuM}j#bvpTXE?gKB61I=9_ju5EXEPJX=c$K<A
z8OoYwB>*#i;g0ygW7R_O%#!R?3US>NDr|A{>ym04o0B79R~Ym6N&*G|os2Q}RvIF*
z&ymofZIr%CoUI+Cx~;<ynNd!FY3<%NJ6Hq8b5P28EEnK>)K+sUmS!9pSCQD2h<puL
zkVeB8mS9K6nwIVtQ~Zt(8K|P&y;Po!Pje}Kv2N!%6hKw$DpthA%t=3Wlp%kJ<oT_p
zzwDhc%!d+z#bV+hTx?^rK4?D0VmxRu$u0FtSZ>RP$oiroiA;>n{4YoDkj)->1DXJ$
zx@Bo3z|S1>MrmsyoNip>tW$1SF$%l`=84k@ieRd8O-l_LD?nsUfTQ5HR&0^bpks<i
z!ajeMSR=kdsoF8kSf2r*mxds!26Iv*V5h~3j8i04GG{p-RI&%$5GmV6$XDvcR@I|$
zGoB4*Byx_Rlio2}9dWV-21u$lK_i1N80b}U6m0Q0m@4uJ=Ae=WiTA4&KU8FGjpKF*
z98h9YE127s&vjcvAlnI&Xx`pdJl20ujjZp?OfZD;R;e3Bag5;Sx}RRN{ndb-M&?1u
zsw9W2DOxma1z;HUP)fxC402bx0j?Pz9D)yZEHkN;kLQ|^V$o}-v1^qoD_04T)d?6K
z8q4}=)AudB@t&(qcQXjW#&|v|*s@VI>N2{tXDU3?W}Z<ISsQ>jH5@ZA!pPVHqPhkD
z0LWV?DXf53Zr<5-<W(N3WR?(z_=?e9?Q3-kyK_|9l(D2nDHk79lIVs?XxniZ#yO}+
zA;&${o!JI6k^N9|qWemEHA4vkZdrP(=&lr5d`9NSHA}cF25*&E+p#vx>&8bE5ev^q
zUq>Cm+;;O_chklj)IwMh;=Xx~S)!PBGHb7XoJ}!?Hg`PuuOt0Oq4@i$o}1BiPw=Bn
zti)ru3mY6~ntWE#NLL_crq)&Xa5*A{($ebU>NtX&lU_>}=_HZqW!1B(dOdbp#E3Yp
z>?dk2M<7;H(>8wG5U3oCX0>j1#5Y&1gj$MB^HX-&N9zq-X}Wt{{{SMsE021E)R+3i
zg)+$*@GD*Vxp^cCk%Rs>?zy_%puxxnxjO8!QreEDhYE4t(Wd=AoA&pu02n{W*JRSB
zj^fQ6ACRsY=}Vt)SPsXA7_PhNOO@s<^;$F&-HxAvjeKh*n&e_o9C)k`>Xv~Xsb*wE
z24&<L+$`S$qN(+8WLyLN)~P#GVvBK=qvn55>GDRt)X<b5_lhq?g=2sL)+?ua_tWdB
zY0UQMJ;d?{b%3zb?_ff*K)^4YS0`15P>n8*_M254k-s2UaQkwJ0XVBmGw(zI>Y}-~
zE}uRsOD-1+;<<Ba#Mq|V35CpUx5a9`4>U`AhGh(TtW5KJZdjUTo2fcq286ajr_<gn
zifF4WJ?w5L&=!{w&GC?OIy73U=YS1i{VlGg+AM6?n0WiGbQiIVD~+bTPAsv>9nn18
z@wiKCJ*!*Cac;_2CmdHU)oiXUuUTbNu%0WfqX!uuFU4g2Yi3LaP?+1zX7vp>zl)C1
zqeq?M$xo(deLH^8%llq|>bH7ZNa_aRwz*xjrDgYd1x;?fHK+*{Tyyp-FQ#gD$J>`^
zzg?%<Ez71~UD}zIqbfGA!LD)YPfaGDGtUHhRyjOZRgY}SF~KwzuVk+iBdBbO>1r@X
zJ~swp!I|Km<ZypxGkzGL3=b7$d2Z8N;Bax>PwGnnX{j?O#AcxrgAg0DUPC@PW68Dn
zJxa+L!Ad0*amj5t+)gOR0n>RL)VD~Ff=8O2Vo?adsO5Z%DK?7lbyZ5WL?JT1RAFds
zZr~*A9Ba;MYgy%v4a9H-Td~!lV$xPV<y}`|lZvmB)==3lPvWd$ypG~M*yf^?Nxy#H
zD8;zCB{$d@iJ0P0cGR#zZsdTVVx4n4kOtb4+SH<ZjNF=cCss#(K(A)@ekqfR9t+u{
za8)u*9W*HHe(T8$aj(d8PNfvO5u$xMBQa@Rj^pBo)N#3j_9}Y`ktYkBW~9_r5zB$y
zRdq14D>}AWh^m>dt|NbK613N-0Nw`%s$9VD$Qc#V!zBAc%ay?$jqSuTt2ZFlfL~lr
zqhjP$1H@!;?GCEq$>xnQw>YHD7`|v|obGf!*$NJ5cx5qhf=H_?fhb~jj2Zy}SsNoY
z_13vQTudeRaAe@n+PZ-fvbIecRu5!5AD7iba}qti>=Hi|Can<^sM&Bp#wu5k$T`Oq
z6ER5+<iH(Mp=4=;Hw#n{iFYd>8L3o|a(NW(MdE1(w0<=lc|nfqSqWLU4h98iy*|ui
zAR!?~IIQ^#fZ6`)=R>%b-LoDUUxF&kZ;&$DIM!Q$ckxn75;N@$!0#0ij}89-w6V@B
z#1)exJa<=wF=k(kOqu${J_v!FsN#o(2$(oO^G|tXTY|&IN&zd#t07Ds!l*S!$-LYK
z0jtX=R+-tsGB@2xK8ro<qhJ9~WYtq#1sQMh4236(mexjMP?E>Ut=5s~=um%^ne$h6
zevGk44zct7RcbBth;v(AlDkCt82hK&mxkmzfwUj0>e2c}#2wzk<o+n6^qr8${kj@;
zk0)sZbbibjn8|FU4F3SNNpB~%rwUs&(Bky%sxHxPnEI(>&@I8@-32<wlm4OKAG4eL
z4J&)93gV<lnpN5p82oo#DKsl=W7$zsE`@Fg#40HJc|Yn6@%uRhwoV=M3lsfSRql^;
zn3g+vK3|IN$3(RUn3tNEXtwMEv`dfbqrsK<2e}`!=g72+=3}<nL;lolwvVYs4$E0G
zN7Z++p<1SWv`z;+Q&&xZ9uyDurCMyS!8gb3;KM}KW&s-3Tn@4+%jvCLzTewE-|D;a
zX_2-9f&Ty%DbpiCjz8j$@pAnW+=;>B^wz3I-`l=Fs<qQ}O=>L0V_=56D%eD$zx-3i
zo`42pfPWO<#>#>2MCO+I74NnV-K;+p{{Z?m;3Vxl*GXF_fXOuFvxxOi{9L5>Vsjgf
z9{AzJe2>LG&qugo%_^`US55tlB==rAd?$|Te-|Ya-HFKV;F1Z7JWN6SRW`X2$qKTm
z-a7`nb<T%tIQ{^i$gH2KeHhY2?QO$j#Z@d*iscTc7IFKvkXx2uGI+%c1Z-JV^Rw|+
z`kjO6^1K(w;k+DiRTu_r0z!{9QnE3H5hnv_+B%|%iJuXi06H|uIRS{qDUKnERdix=
z-9U%Kc<E%(ZB@z1$mXy8JteKlSGZOqqAMoidEY4+A!^e_xs_!o6Ki9tXpqy+kfCLV
zgwGkFB-kVb9R1>-7<nzo&Rm*=AdLHNenZV!K@aKy?Xel}H4p7TaLB_av0eeAhn(ZZ
zKTQI1>|3GcfdDmCm)Vh6@H!Qeyi78XPj$4sPj&sA@_g1~TD5pCigvV;Bj~O(2}wXW
z1Cvg(hvM@yfT_^?s6II;0R)~YT$PkV&*p#vuGctZ8?q>9)xt9jWE!41%rfmN2w{$V
zQctyAc63iP2wG_}N@Gy&GAhRKvV{cXpQ@s>bvlYJ{ithCGzzh+ugz-2_zE3hhAU=q
z*)<HgHzBz-DoMEr1TWoru#5>5Sky|<?r^~8j;ga-MsPR)j}&uC457De7^Y7VyKS+n
zJD;Id%1JIpQE#Ij*6h=nP=Q4xTOKJ%48gKRYP5et>Q~b4m3^r5&2F@BL+MsXttj`L
zf9@5@)OxNQTqkezbTImET%5iU&TcI3wU>|fmQ1QU;;!`XLu!+SX&Gb3j8{^#(_x8m
zZ*HlNp;lJ<bnn{#0OGj0{c|Vw(nnVhq+|2RfUxu|ak|MorHJ~i3?84bKtIKk{Z_u)
zMFeHqjf+mVOae1p+@7tPU);`wJv$V9BPRxv0s*oG{{V_DH1musAb+)LL{TSEif~js
zg#xk9Svfj1$4vxhlTLFMLSd>!(&RYF#b^~r;AWeFlT^z-N-S8Qiescmsy<aG{VNDC
z{Q0YJ1_n-PCcrsSO7qUH!m-BsDnIF`plioUC;e4uN$gj>%zyy@0D48KpOznLKSf#6
za5K7^bkGiP4O{1uPgJY&_<=|H85c|*)I6e+O)VXAV?+Ilue<d1yu;-JpXgSdu&s)7
z0l*aS*JfPUuS<>iInJTzn@JqHL_x)2{X^)B`)S&EN4TS*iuwcFR^Z}?mevU}83+7V
zO9!pz$N6$$%jsBhb;0tMt7CN(+h&cU&m4*n$F(>hk;Q#8t@=Mr)ex&fo4)Z}ztsN#
zrk7T7Jl0^kgPd2P(fZaL+)IXWbzYH^A28(R@+kI(P6yDb^maoy3VuFCH&?Zq&g4fd
zZil0hNu@@gyFT7{uH+`A(Z`e=T{A;5PjN(J<K~?zo;I)mcSx-hN4K1ce4;OHc_FIw
z#Rs+8_a565oYKvey;LCJd8Q#NKJ@@&%}>3^EXoNq01fJJi)Xn`>d4H7<btCFR-0dy
zTF2WQHD;DyZHf*!<kVnij#e_evT3M_2QA5`F3qHjnqWX#nYdBVsKpU6ES&Hbjnm|f
z@0Cj7MF#2=2^an<%SC}Ex&^q;Gy;tspb?Gbw_~A2mG=mg5HaGTh5#4{@Kkr2L6IQ@
z@DGY$g~mO{0yPDQ&MVuc4YiPN@+f#c;WqfrIieyn{_(eR2=1W@U3p5t(jxPoMQ7QV
zsLW?4&1?Nh2uok_FeLX>a7zS{h$jJ94hZv0CI(g#6M=$FCA<PK((YWJ2A*IoBRcXF
z)Oa!m#s(-dAUT+ch%8QO`$d&xfZ9m83_GeIj$3G4pSnLtEVsJ}<awl&Y-G&Gs`UQ=
z`gQx1ALC=3nyRv3&$~FTi|QLF^$R%^s4Rmu%_ht;x?^bvs?F1(nlH5Zgm)@nUJrS#
z6};~t<YW7+c9A|VQ1V4<Vwok2D}Z{dVCYb@8%UeUIH$WhueLB(6pM17wf1-nDYsIn
zw#QG72&b|Mzuknc-;&mMU3DBB7xPy!PDERJW9F#!h5rCA$#Q95L@MRlvE`0VI-r<|
z2=PSR0nRf+M}>`#WYtXshC#Kk2qLVs_WuADJF4OoAt(7&mYB)y6UK2$h&t*O+_DkH
zbT2^Mg@Qa9!jdG5o=si)Q&ASPq-?n6xgN8ES+F#Yucc;KvhsCVE@hfgl_dbJ51|5G
zY9&Ykm#ozui#&7MZ&e41w)7~FdXOrP4|U*lv8?JXbbAd%_hEE(jU0rOsiW^7WGq<F
z{{XajkTQQR=uJydgj+mPFjfPc*QsqJdHTMNfBJ=~T3tx6vjek(SQ(&>S#uu%QQoId
z-RhGF+~aq7pklL<KIP4My+quyH)o;K<JpZQILP+6F`tm?w7!(}ZN;R3-2?fo%y!c5
zF8uva=2;d}rE`inGfNYDLze=|xjVj_>U&G11tl3iGe))CbAKcM02TA`y1sz}+p{4Y
z@mgO>`r_&Z_OBxkjyD?U<JCrL+04tLoOzEYT)B7_0vHS)s<Pe-dA9=+Z9j^ugVYxH
z@hDX}HFsgKAXVg=*%q2HxHQbKuCyy_YXfxv!SXAfT3it;B6+T->*<v=;~O6QS16qx
z8<5eC81stcYvB#_j?bsf`)x=l#_oCKQMOi)pbQiOd!#{#7Xy<?1p8#e1I2P=<y;%%
zPlQ-OAhx+#qJ6BtRnYxCt3jw)sFFlk#~40qn%g=}A;<t#ow67}k|rv{kVRRh)=Nz<
zv}e_6W0pMjbz57;gzTAk@Mw#uQu^Jqf}~ahN!PA-3wytk9s?TJ>35G23f!US*QV0y
zp`JGrg`WhmI+2$39)xYxrM6D|RbQqin!@uFw+wpvuAzBv9n2em&T9ec4Ho9s_uh;X
z8Sn8}wVGFkUj7ySO?qtWEPbr6{2LuVP>t9P;*OT#06U0cdah{|)X+CBNA+4w7Vq2&
z?IC~ev}rZa)Gj+S>NL+2=c1GK14xo++U-=2pevf`t9$#$jzjSW&3yrOu-#iSI)X^R
z70tD;O<h@EwbG_xW1cHNsOg(eBo&*zKT?wR=Klb}n$$^#4o5W>op{nGwog@ad2A(*
z<hiO<w)Sqg7zVfzX{vplRV39BU0W+jyo{XE-~`$iBh49hP$0!uiT2C4j8nz7gpbM4
zmf=!Ns$h8(?Lz1(d{?S1j6x8@8KA^I>D+p#ox5W?1ByX0$s0vJXOynUfg|pT5k}`=
z7^ldB2(Y9YvvRRnTW+&QFfwu~d)HP&;MCS~M!~m_l>?}&$k<*f*UCc3f+^!bSpNVa
z)fOlH#2FxiLB(iL=1ywT+9aCT?En!^lWORlu1bwS?SQj%D>`<&?gq4)iZe!IBC{{2
zzq(4uK+SE_)pE+@+Cgh|?SK_>e-&+{-TUWbjlbPfsatg!0M(|L@klq3w9w5Jx1pl`
z%(c>oixKlxth)x>VCJqecKiU%SKk*I_yT^#eRfC8O34_DgBd(gcC1wj1G!H^g>$%*
zkC37?;HAiFSiiAKYtTfltbEic=%X8Qe(J+lg;HX}x&cTTW?Ihp3F&r72GN7&y&I~?
zzG<-y>_{LJPC^in!*vTFOw6vWg~6%xSqxJv2I%!bmci)Y)b`S<OuI<#nl70cumnlA
zMmTQ8EyRQn`#3&mG`ccdZQIc9oo_t56gCJwR+}J5xKeQZaZXa(fde4<s_ADe{{X~h
ztu%OUFXu=iQgi<RidjmyU4*jIY}#3rsNU!3R_{m9j@5gEkN(;x(^?!_d<t1ZOO8pa
z&7@%N6|WAL%*rQ|=w-d65Uv#fqEIH@hH0Pb#&Rh@P`L-1*r1Z&rOC1Df=xLaoKq7R
z$?l}!jQOYIpbk&w1}WbqVs_FNUNPN0580&&;CLs^D#$=0o@BrTD8Tbd6Lt(^CXM+3
z#pn4{;28$t()Uc43Y&P{=9o7HIBWymPRLIwQg<N0rhHFT8lQ*&^lC==;}{sDvbZJK
z`1{UqY1_#=Nyw<k819^d11vHq5O^RqbHyokbqk&aHMX2>HRMuG0mV0D34voV9qohQ
zQ!|5sOmB`j=AMU)Qm~1f9;!tr89ZW^K&Byq*L2m+XkCz$IetLuyyZ&mBPOF(JB|+&
z{<%ES+Dj^ZLFxTJR<<$RLc7S}vDI<^09g7$_eW+BA(A3;7<jJgdGnfwQL>8m)vf1H
z%EKSkWzVOK)vn7SHoHD$xMxKRFE7!lrC6hlOD;hf{{VW^`tQ<`=ySaIEfbunteQ8w
z78v2Y16;nH8bYAGli30l3KzPDO+nV_N|C$$L{v<YBrO;tC$cC@cpc<@fU#alqNzI}
zz0>DkWnDR9xishb$g<<U>Yvkh2=SDV{FHT9ktleJf-pQ)wiGMLzxffj7!?lSoGHa5
z=&b-`-Q6{10zIsR<IO7xDUu_S2In1B9=yl*2`1c+BCIhmmobbhd7w3`X-qiX$@4-;
zg(cLHwENHiMMl%1R@}ejJvB!>D-&Wz-ThQCu`-gQKUG>lJv(Cs&f`vo=L3}__^H`q
zbuRb~jtAz2b|C!M1a(Hp5fZaX8c&Wot&X25OCU>b80xUgJ3H=>94%_J+k|T@wr~dm
zqL%|8=-x8q0gUrf%35gEP;PFpDjgEW=TW!<2-|LZu8E-N7PdotiXKut*FRb6ShU=B
z{*H#1L6bK<BQNRiOJ3a{a_E$Z{nq11(JZW#dvS(uvsO0nJcnW_$Iz?IJ4or}{{V{e
zy1!M)mHgc=qtR)7J{Z@<f9YFqZ1SD4ryqe<_F6$I45uH8w6}^FmuOL+6+C#ZEcH^8
z;OWN#k7N*9#S-CAW{Kl&00O*N0ZiUW;8iYPY_%cr{!KWKBATQcdkw3B)flb?eTj_H
z5Jz?5aJxYiz)KuZZ6Kk<khSBU#YPF=hU%VKN!qnqnviI5Tn_0=c1<A!kUUdzcViq=
zSQjFU<lu2kW8NuQurMhi$!rl-zoC~Lh^4VkMmZ!`r5gbQni@ADzZIV@e`ZNBlH9DA
z<dRK!2IGv<p#i>KQ^EqI=M{SAkT*w{;&?f!0F98BJk)BuaY)4E3=C8M0BDU_<p((j
zit{44+Tamhch4B58@Ry8sTP9F$E!UTrfV@B@T$p<7z*Ti-=i<|JC<aOv?KjMc(0(}
zC(l(3*05dMymtt!q<IzB(E7e@G}7)TKUL}2wGroreA0J=wO7wH^(>^bjt4@zU#xu=
zVrIP4lj1omisqMcIKC=Z45tKFxzX#fYT>#J@_M}%O*E=VY*Fr$F55*DaJ$G@yb5i|
zRSO`-4`QQhgd9e!4Q*VQiv4N;Z1>1;MP`9T;g>nB7PTUn^5>5tv-d$7VeRlS#%Qrw
zAt4<G%&8qzR|OhTwQxSE%axEG_~xfXib$B}4d#FWrHzV>r{m_VeLCh#zz^|JjL=&W
zA|zMi=+&R6a3pz3@J|@1Li_`?m23|+<>ASXo;#%7AGF8;%}s1#5d~gYa%qA~YDi2G
z$?x$}yy7p3<24S}k%?Bt4}<P7lblfr4yhz<0~5*dS$LV2>B~e<(5uZ#XjJWEfH|tv
znmn_h$fc01d?h5dc;igT=74zCGxBanVx<=oLphcZ0v_rK=Mn~HQIkqbJo5C&Fa<YH
zlQ&?ibDDa4EdsM1hl+<zjDGP-bXuhm8oO9k)Vr?-gIHf&=&J?6xq))6-Ya;G{t}%W
zWKdTsm%4nm%$u2qbxuh6EllOQbj=)~V>qn^lQT-L4}L12Qn8m#zG$RU26!}OoXn*%
za490iFQ(zc9uJz4n|fsQC}eH!rO(ESoshw~IUkC#J0(!dAS7nobAgdnI+-#^b{wBI
zWUGK5khtd+PpUXcF&02RXg-JmI*p}#`KtCCPbQ9(?)d}Hni^z4x${-B0~~NjqU@J*
z0uBWaE?HGT>WtGtIgIy8K+?+|0`f<6b+K2OBu*H2Kz`lg`8PSE^u$o%ywzEvoL!gU
zZAl$x(9$pLK4%|w(Z5c#<a_tM9RtO4AJhGpR}tDV;zNU7+eNaAOSV`bQ-C=&;IZjn
z^5r+uJ!JExfkk%9*=`I_l1GZ-p1s%RZEXP6Kq|lO;*^CQ;<{$FsjN^jC!t)ES4i%!
zrJNwiC&g&j<xql8$hsI@_fQqQ(7KYxE_~2(wa34dLRO2mwFt%b6GWL>Pw_==#Qp_v
zQ~7YL?Ws}$OT(RvF-`Yi8DomGhe(=lnTU_<SETgp^ETb2FZEVO6r1UY^25xEX1Ynv
z$ol)FNYxvz>g4)jc)#?Qs)}tNP*H`5oPH|LYD=WW_o3mJo`t%ON&f(6J~DrWbzL?H
z!~<gTJl7!fmXoOsatC->SINb7z~<9WX?ZN5@m*~amdcldm#y2s<K)18qu)d|0c>O$
z`psi!07tlUL;ASiU;3f$oHC~FD5kY$w91pa9&44al6h@2tE9!V8nCNrRpK0H0)(I4
zc3_Kx;+IBdiP>hy<1}kW$#GoiJ&uhuldw%X-eg_UJuB)L&|e6`7l`?*7?d*Qe0|dU
zdz%}1rFmBCXk@Rl!qH`(HDYi_HMr5|6In(Z`7d>WLo6wP)uQx#(m9m5$Q7^BayaMP
z;g~%}3Rv&x=-O<kAs%?3b@{FT0Ba<Y?ZS?$){_LXc}d0vbNyoS>U*g-NX9aH&3Y|X
zsyb|2rOC(B;CZJ=t-8H0cK-n5B0TVF&qBNlC`CULKqQeD*y=G}XN_-S5dsG4^15uz
z>Qtht+27BOKF-!}t>!5f=NaOsj^Z1*9ti*-{m_XCdmzex(^$=0R=>5HSm7gYK#KLV
z$sBRrE{{J9ZzQ&^$9kC|vAZae0uO;%tNG&uHyEgGC7x@kB)gEi6Tmedv1aUn<IX;-
z%jzB}=WZM$(Bj1L!708(yF20DhN}|JS?3I`im<i8ghoLGcbc6QV)+VK&BD?dq`C#V
zwv{c)s5^k-qSBzakqWzDim;MKknK_r;+m-G&>^XJx}ldHkjVo?c|6dLS)T;tRM$TH
zzQsJ$L`@3gZsSb_z#Z7G(L%jr?xKt>*C<C6#*OX)5AhW1TT>S?lYv$wlJAjdisvyM
zhnky4a!V;a(3*_lavQp3gsxe31Kw)Qjl#rKod#V;{{V1z4l6mWM(|ErKAN=GzaSr@
zS>0mfZwa?1t<zra1#j#Q*-^eyoc{oIX>4KYGAe>-E-nrcwk;c^!9BFNVqI}tGQl-=
z2+NY9!cff1AjU@&J;PwKXCj_MBNC$~hLT0zN#OYv^`?Bx2I40!3h*iL0+Eti1W|GO
zOr^_zwFt#sNY9$8sDyfs$)FO0@&^@K&ZP>FD$?p9`(3HsQh(^aToOL2tc#K&NQ7~#
zf9^d~6_sDS{nv{L9P_keiiCw(I63>OLP+je{ygHCvTc_naZ$?ZW!er0-9qvaCNq&!
zaVXd6cPiFro&`vg24RNycojisHe&&PG(=N4kSdzerE0)kMhufIQRSJB71uonp(TZq
zO(KtV#(As<>E@ho^;LHn3C(tB!lEc%Hjb-alNJ2gR#|u^GGQ@{5ndQ8xYJ#?^f{+&
zV4JgDDx_w+B~S!Mo@o>X8O?en$iY3*`(zLYG!RJx4+Avp9N>3MLCN`J6zI-QO;7Ma
z+zvd`z#wg5-7(0>$0DUh-QRT)2jG~%1mcz{3}Xs$n)5aQ9aB(%4n}AtVHQKb`}?E-
zk%3CC6?2-K+CJEDHmRbx;0%B%3_0V?OTm6RrGXHWyPw4t_7lek?h4@#RV9%pOo;;o
zW*#ZTkeg|tNN!FsPEDZlbDAU)H3O4UMn>%7yyR`b6#Ho-SN{Ma<WzFv$SjOW1gOEL
zfwi;ZoCe~>Mi<2>LP!L0Nn;OOxH|yAuVZQ!BcH1B;096MBNE5@sDODgIl$(nRR<fh
zk2MeSKZ=~OR0t@(NQSxq-xE$wah}C^0tjqVk=NYG9DPt}(JTY?3L8CB(vNJm%4dyo
z8ywfncjcm);Uf-@iu#^dgu&Mw5OZHQ{-){YPxUOJMirQv=4vp<8{E8_amlqsLVK*F
zImLS3{q23ljxgJX4OYFx#{jn;4NYk*U>|Vud#uHU#)onl*|y=2iqY80?2K{2@mW0;
z!(E+&<JD?yW?0xp!vpS}J_3THx%QU@3uN%L%ZV7KL}f>wDq}Qu*XtkxL<2ac-d#Mb
z@~lBr_{9qe6ip*-3~)SCZ()*aX;=UiSCddHmy$In2kNEN8c40mHcsKy784V8yX4`H
zL9a~ev8u+nZziX?5e9Z)wSnMJFF7l?t`@7YM3#Io&-~LSx!lX=^H513XWZY$pq5nL
z3}%tahDa*r=WqzTeb)ET`Xu^{al3-Bj>+>`zeRd#ZEH|jrUXEzB>mTI((Pllw@Yhu
z!l=g;#PttS_LGD=J3gQ3{^!np1nG8G)=>zp&=0eY>qVt$vs^HCTrEqWXr<6kIKcX?
zJaMWZL;wL^KQ^hh>DxV4Ejmk*3riTS)L}tCbWq2N@yHm>Nf=#+ajZPantDS3U%pK%
zaBvMqN6U(G6WOFv@ERokLk677i~uSn&x)Bz3D!^DSsp}u3PIb93SlHJGu?QSLCr80
z$Q30dh>+vK7320euM#-VCY6|y09;bi<ResM<kOizIUQ5j5P3A!zyum?sc;LvL<~oo
zZsVM1ifeaG8P63{o9Jz%fQ{dD;uw6YGzYU!V?{ewz|e#q!Obcvj@O!KZg34hcpUdp
zZS0Q5xlqG2<TxF7NQh>T4DxD8<1iA8bH{ZrkT$;`Bza_PebR34ia^A3UT#}7x5ooe
zfS>NBibepCIK?4k5r79Z60<2I!K)M5&>Gx_?gt=@Qn6B?W06skNu_0AGvbv^u+Y{}
z>QsP1>{mPW-=We;A5pMC8@5qh6fhhC&3KLAs|@GvwP`hT(_~Y|%Cq2xXgJMqeAtmo
z728KBI8(?zDov4@i2ws#7xf#`Wfk=s1;OpWu3|%@kaA9W74%O}*F#s2e0%bEoqmd1
zY18Y#F1>FPLQd>rvr{}up;sk`n!MKxfe0#b_gVFjZ^IFq=vmHaad$zvlwzPWxhISR
z-Ayrb7TvsNn2wmn&bz3XtZc9DN6!i>+tZ%mAyvsrsNV@w9^Oq@Xmc!62-M(?D54`@
z1WM66lY@>0J501`L%ePS4#A+7cP_ET{EudjhTda=8x<$BRZlk8NT)7!2SX$c6rUL~
zezQ<bDBbdGRPxxgb8crO)Heu*KuG@pYB=*Lx_yt2BAP0-SX9K%f=Q}-a#WRm0j(~u
zMZJMyxya(O@iI)mBea87HaRQGg*GXnBzyuCZazgUh=$~V{{SBp<pL%+93D4R0qB=7
z9E=93?AP18NY||KQE|6woV81)gZs$U$49z^MzO|YRx6AQ8i^hhaI1jEibR>9JbXX-
zp=X8>8oy2`Nm?vMbJo@=bpe$Z{E5lpv6iyo!npo5)NWC3BO!nsa%-LXj=}7&i38xr
zAl6o?h5d!cBC=;B55*m5;75bVs!cvojjVdBPYZ_c%{l-~(+!wW#Z+ppjsO4(uxQE*
z$oW%M8ma*KP6ZY~BvK#e)kzhkw{rVd@w1=6rx>JIfFyHUZ`1e;7SA%px=ip$th$W8
z+#HK>W8W$%bl*YxhWBpELILtcUf6z@Yq80`L;|<^AFJfIxLaF@LCL_v*GaVY?kyH(
zAS;a5meu;Uvm3ajM^7G;7n6Y}pmhkYkleESVDrz4-s%2{(M_N*s=JZSe(PVU=&{@a
zVt<$SRkn6l`gFjU+0!+S9<`S@{Ez<tqf|7w;rM`hd(-Dm){)j6Dx8ihtXtm2`$A+;
z8x={V-dW0^%9~pp@mDc6l`3GqD*SozzEdt~nH^lU-0q`0O#=>z0U#WcTh_MH0LUVS
zc{Hd_RBj@ZFSkBrLq>Co;LIkGZi8pC5BpJ;+6sja90Q*?qHk`dmVDdW9kfNY&Y}PU
zJ*!nI;)CH6bfmdt5$G2U{(uD%o}RcQ0;3dsGhZAMYH!@#=ZD2uv&2omvYhA_YUFGf
z`>!vjZr>*WRm_)nya4=EO>=*qB;teANA?uIU~y==giG-XF<z3!``RK_<LZfduh0Jg
zrof@5H_i_GjM4r|T_9KNq17~7PDR3|=JQlCX;GwxEbu(mmh#QaqDiDL{`ClgHga=Z
zy<1s-wAqI=ptxpj{XCK#%H31{0O^h**aua5>SG6DlZtkoa|DiKlx%O!XUC^mT2F%6
zYG#*aS?uMBNC4)W2e})3keb~#nk4y07;MvH(_>aX<*t9)wLeNvwkp@jBQF$DynCeh
z=c>~BcEm>x<roc7-RYaa{{RUJD&D|J9iT|zIZ?%I(&%A^TfCHI)XHj`BG#@AwSk5<
z!ygrgoi62L?c|!#-dXB1+%wA?9(bqQTgwQ~?eJ>dMxHsEQ|SqhE5@!SRxL99n6U>n
zHKw0`9nzT;5t`GMWdz8=G3J&DEE^_C#%j3fH2B(*L(5Z^YMUgRRsE%agJ~{DVza>`
zmCTN%vU6HbdrBi?xMBB-ic5_y1U}qb{{Y&robgFhWl?jBv}P`J_#~7(F_k}oUj2h~
z%0J$(WV;eBZLEm=(1zOPNdw4s{wtNMj+&cYmn^wDY^#$;hD?y5PvWaD^tob<wt~QQ
zTLq6`r7^3WtOat{C6Lt42Q9UlSnnBHDKZaut9h<KK;}2e{8rtp?Q%hh3H!x+_EzKp
z?XMq-vRKo0X?~-*a}x*=-aYFR?mvoTaa@Kw;bWi0bOCP~3@mGp>Xt-62l&@N)hD#c
z{-gC1o=azSG`ms;8T?eSX?L+9U5%6Yu7azD-LZ530P51bWJkMb$^2EG-L4r^^#}BF
z>zxAbc?bB9{83TpSC;#BL!b7phJ=IsK>q-0^DE;act47<M&nj1*Gl>@mzpiykpdIt
zj;fA-OKMWdyV(ad)z&<YSD(C6tSnmuV1HF+C^+=Rmbza@Ah`6FuoL`o4AFKzm(>y=
z9yQ%vP{6<)qd)CYG!5AYKh<3(>V;a_pEie&AgMY0&@!X@%)ksEx{3?6bx@-u9%ze~
z3mFBuHTDO|;t)F{bO#hGheA{UD2qv5n8n_DHRg%FT<#*3fE@#Z8JJ?LwF9=}b_UT_
z6EPSBP+GD_?Fm)+@+zFYSW#PXD(>7kG%%RtpETJ?CZTpY$iT&AY+^=m$8&d056wdF
zkxDbOKUB6rt9h9v2X$K6$-Z%(39RzD1QKfFM<z?S{<13UZq)&!yMB||1hV&;d2cn)
zN}(WQ1Xg3vF?%{ZKn!GNw7ZKp1FG+1j@YDRlec8fV=<@$3_GST$w<!vkuj33-8s16
zbSlXG2+1QMaC^;4h)zg7)CI#80dQ(^+fx{#(FOnmW4f86{{R?Zr;jwqia8;^H6qC(
z#!*gG&~`*JKaLoT;-rrfF+EZMHVX25K_Zxh@r7mqxyRKk{@87?sip(5eZ})q0)=-I
z@;+&N*hn34F^+0vR%n1`AP%V&H%K?J1Ma7uL@@hkybm=S=7>R#cr;Lw;iDOpvM+*Y
zNXa=BI!IPMm>AD>B!GoK<jWjq>X7)ruX4PB!LKGy9!)ajgV1hpI25u0+W4q$!$O;*
z`5I-6l1=>7gZZH9!nqHH*;i{W0i<U<s2}f6oetIc=kZP4aexIP=a7)eDUC@3KXlu9
zz1)xpsgQ`<lnf0zF%ao-%~L4m_f(Vk{TD`9{kEFt?DSx68RCe_0fzqoYKiddFfaux
z4X2)ds`ty}q<b9Wo-4`$ZK9hR61hE8ii@?IJc^c=l2CxOWwGYCckA`Ak5Z0MF_F!8
z0#tmnn&dzIK*k^0aCxkHzF}p!yEyVnyEc{2)p@dvI1QSW{gsbu^5&q8nOGHF*gS(=
z<3VL%A&%xea1YgLQcro50gA?2vx&rwfzJZA);MURW?j5f$D$y}txS#l!6Kh>vcj7P
z`1z=Ve6WOyl5;aef7CHij6#hB?w}kM@+mH*2)jR-@M)TKXk;LaFz%M=R1)2ODiFzQ
zLv_In!Aa_=F1^TpEZdf>{YXO0OCEe0&QEZ|wk=djR78XCF5!{|i9i@0Do;%5bL!fh
zaK@)*a!=x^n=I??f=eE&u>PFsV#7;W<Ul2mWLG;|srGT-lYWOy?_}qsYjj&Ebo)r{
zU_cH4IP+Wo0840vm2M^<&^1Zu{UxsF6Ux8iQ=jg-96MuBh%g6=@cO++G}-O6_$oF_
z8i>ZwFa;_56r=3(NZ-|AYL^C)#QX={BV;c;(?B3{Y5m9fprvH05UwzMSFn6_PEvN@
z@&zV#^gE%ob_-*W9=fR;C2*$|5htwE_zRr(RwuxX9_JV|<_Z^%>9Oawq%3oQDlWMv
zlf`ldJEvrVE5H>wuRh%59%yt+9_31Z_f%}BZY#(Rd&s0DiZXV&0#(q!#X($iN)(fo
zG`m4Lr66PCd8*VEk76!E0mpQ*jGv7lQW$Qp79gFpf5<%Wcmac&YVxESa^bt~n~%*T
zmrNol+H;&!-M8S9N&M4D2Z~I9a5<%1b{etV9;q0Ez@$@wlU@Wd<INl3lsSz?TzTf7
zoE#oSM)=PZvbx|gLzRg&A$%_*H4+hyd#9J(g#$Gr>NQ|%kz*;4Y3=;DH4CW+>YS&H
znrO?4171&DM3X|%NCPVj5NnKnrFwPro6~gyA7Rc;2UXg0o=K|z08wd>>iSf+(x@bZ
zgZZwfr)!><Dw6BTk6ERTqZ^ApW7Zl;AYmCCeb!wVq^EY$$BNr}ze=A?n&#R?E3loX
z?yy-0A-Evd(J^L@NU1KF;d5e^DY(nQIm^j_`TSBg<vapD#Ru*ca8vJ-M0>36eAUU6
zQ1nwQncK+vqAV2!;81>4RhE@_G}qJy1XL!L>HCfQNhH)#PoYhYfQCu()u<34P*XUn
z4@$!AB$9!Y1EX3fZX$9Jf}kI&!8h_|Ny6Q-R<(%R?q*z`vsugVA)JO~BgQLVE}oM`
z?yG!pSlvn(BfFP-l1EhdRmigxRq$47q|XM-@mcGbm0e?m{QTAJ;FH^D3&0hXlIg#6
zSbRt|y|Y6kkrpY!u^{=)NTid5AgJe>hW1Zz1Gne*P}@pNZR_zs1%zNrlaQzGiPK~M
z0JuO~Co~nRt2y4qKs{Bpxwrkye!=t2OXMb|Mhk4ANgQ%0X>h}YJXFC;#y0$_JE<8X
z0R~&mH4F&y1VW@^J=R~<)}}~()jNQ#Y|x1y1&1S(RQilV_VNSBJyv{(baU%&Sc<x_
zb!7_q%~oARAh;!bnj+E5d8CUJyk%B{ZbAC2?yL$(0XZhFY~<buCxPawwQ#FA^Gw+d
zm+3#Dr1f3z`CR)c$u-!neKn@&aYt!w5WpWIMRD)aZ(Gahi_M!iLm|Nx*zUEI)<jnj
zj0RldyeF^Z*27U({@K~lEKVGDRvj+L-7Nmd+`RQrdXrMuTt=2@vJ~(t)mv%76)1C!
ztTMIB#jKtsM#qgv13>j$k!^=jLZoq;-01V$>Z~KTky(#Iv9~&Pt=A<0Z@Sa|o#_7n
zu+<{D1hQtI62Ym-QdIu{LeF#~W`%T1l*nsg{o<T7*(3h|^dI=Hh^~)<6kw04^r-_6
z{Wbppika|fu{M;sMN!R>_&ML~<Y0fof8vrmEO!Qf+PV_Q0)ze~rh69Uv+XFXUi2YO
zGA-XH+AyZo!(n40oUu+<ggF(qHun3o?TS+czJ4)^SLzZ^iWyOiST2}jX&A3OHz4qa
zwuuxu{{Yhz?`{Bb?TU5k4U_)>3`1Mp6T%$QtQQJ!$ZKvA4F3S>jMt1%=a|;4j<xq-
zztM)83#{NF#dulH$%3Z0COd!s0EQ`z6a;X9P?oNJsUW7lj(skba{0oOLQkX35ZuER
z(!XyJ<#AO$r?=as%D`cO;EJw3?pdYae|1F|SuF?ykPqgg8d@`MZJ_;%th&^0EsGiQ
zan(s}>LqJ_WAR<D^mljeGLPm;x-YHmWsO55je+^2jM50l3z6j3I&V|l#cp;*m8$Ds
zQ)(}lXh~f4S3}b!AO2te0HU|4m;FwICETjHWRDdCYfBarF#}kAOVqlh&AE!+&;#Jr
zHQCgs)5w!@PCSaeXj=aO<O0-7%ubVat{_~rk&3UEQ;K1qc|bp!=U?>wRC{7s+43?*
zd{KIQH!X0`Fmi{C(Eb#kb7MV2@QrNuRt_-p=N<D&9nO-m?%tb!HI2N{ukGc0vz0wn
zY}ziO6_CpyX5=j_y*4ko0cs@t9WEQaB&I{eMK|}_Yvd%bjCeJMxAg7U)05ko%b(?w
z)k|UN=Udd4=2uku6hG53wfPhIGx|1bduW`A(0&1_4egfQ^4uUF-DG0Y<&N@a`AC_<
zgF;+r_pY)mrPMd^BgH5BMjtX@{!HJ4MSbd>0vU`ea0?GP;((jhx-1cwlH~~=0jxWI
znQ9j^8CK>(80We=+w^^9KlfLT5BFAors*&7C%clnG$+@(Gwo!$i9zoshmZ9iMqA%y
zt`MjG?9~O|==>43Be-WD-6f~!+%n7ok-ym0i$SGJ@fY05_IAHSOQrgf*w1R=)OJN_
zfBZdZ{{XhKf1>`JUrj?biaBK`Jl98V>&begy}YgwRFSV5;X5hDgK7^dE5|~$J}TZ*
z#V~Lym+r|rEuyICHb?y;oOC-B94>0ck&hK>)jyzY{gt2eTTJI@q-`e5xA7HdF^)|`
zoVOKR^;r~u$W#@8f6}c%;RpOzzv*^kv<LmD_WtRNc;r!9nV{@U_zK5iw%&sWHDJMg
zkbjm&DxSsjn3V7+{pBLYb6<Z>(c{rPN~#K;81+q3vBcY-4MUlj07oR$Q@aFG!!4d@
zK8Qr~ZgaTg_@FMqkw|m2RJSdXO7Lho-HLaTYKiiQR=WjaGef&%)8!vCP?ODM98-V?
zG^L0fbxh7_fB2w5LCD2idVeDFFRj9<)cB)xxlDHmLGxDPbs#mt^;e`o8ZEh8XK1Um
z&ImnbvY)2+f4I;gImy5ks{|?>_g;x~VH)Z~oN_64vBPvL#O)xOl|hi_AT3y%KoJ8g
zZp9E)<z3(SsFZ+qRG#Bw3J(<<WEg%8f-runSn^Qdb6#~q!Lmukc^!jgMH{tb9;#D`
zMJ@uibCF6i=X2AxDO`-w*hw=RAPO<~rn-fXlSrw7xqsr0k_&OCtjyVvo~c}vSrP$;
zPnH|_)Pt5I{U(w$OcHyg#(86ppedVI!4f4Y&t57l9A_NUx3FC2@k*m`+n(uDXd~f|
z+&s4x;G=j4toKGkYUyaw5&)qW?u2Ezx+N0J5}{BI?DI~Q(bYmARvns+S&7K+^HMI+
z{JTF?TKnvTY^)e=&TGsFJf11ejO;LJvJ;)S&MA-wM!|XRqyYiQ6$r@jQUQ^IYC5tH
zQ;=By0JSMog2dEd6Tm%E<0Ax|(4rD#1s^Qeh-3X4ZWTa3RIMiF47>qABXCCqd~1pR
zyy`KwsTH!nLc2Zv!rR+CvK%U@Cca<&LDYZho~52`+mTqPtUAejXuk^P%3#7qB}fEU
ziFG*dH3Oyq+IXki$V)IhuibOYgJJem2IR2rwK_f7x`_td1QnK9A~1VM0AuE_v?&T(
znAZpMPZmP_$PdIOe>C#f82B&v@y!Kf(r+M~iYi-%h8Fn@9~4-`KHy1L18MSTeM(}0
z{{UH~wYh>u-2OaOMzyLZxZPk<N>mdy>PwfBal3(3tV2Ez0*=>bTZaU2YOQ%VCbG&_
zGDers+F{gnblfu>XC}Ljjc8_?D`=PEFe?fAbEl=An6s7ny>?I2{X72v?%jCtn)1G@
zskYzI>wPOrQD3uZq*<kvgmI`D1DZRA1B@DnBrYmgm=RuO%XE)JwvixgEhr?9>YJV^
z7diT<*CNp6Ic`T3q~PL{7&y%}1|KGeTmX9R%{fWk%{7SKn(=Z5G0hvU!B!~blU@h!
zy7F!VkxP(HaZw-&8*$rC7)In~n4|`&2?263Or3#<TX5i0aBy0gB#RtoMqr9NC#uN$
zll2$VS{g98yh$V<=|-ZVERK@42GmuvW4x%xS)^tga!9TZuYS5jt^WY#*m68>tflws
z2dHnD>>(Hb0JgQq)3p*^QA^cfPOSQT$^iaKKi-qxMd#dK`LB~_{X^HJ<*y_k(^L)X
z{cz(K(*FSd)XUPc{ekN-BkQp-Bw>{2^G<J%ITi9;KdC;YgpY3Jm=Cj8x_|42vu4Q^
z<Y?*S`KJE>OU&0Cv0k?d?}O=8Ic#SXp__huS1<IR>gI{8vPC<45+5vZE2G)oTirq>
zf>n_Zl{L-FgDx3V)f(YAWLF1d`8$cPWD*GOlpLm10x6wVZigKFj0$uLI^9hJBA7lT
zgT`py)|o2R6G)>!7R@jm=9Fy!;*)qB3TIbfLXZINoQ$YNF|tK0X_n@KyR?Y<<Vjo?
zz~eOWfHB=Qlq{bvso4ba#TeUdvOc&xNC$yS^2H-JJkpZQjxkQ@0OlX9dLfriI*cju
zLG!?`oTnhp#g_mcYwX>2$~{9&p3XoBbv5!wtn{|g_1lYS6oI_s`mbI6Kds#thF&^6
z->zvo%)j)jh#`}Du2aaPtSuUQoJ6??zr|NT>$?LWR{PMJXT2Y|In8=`WTj`5%O@3R
zOA9%FY;0w0#(Ys$S_D?|i54t@-xY1EK^>La^Uf&vZetd};BF(F)>Ss!Gi0{ABTGRH
zLG8Xf`xWUoK^bWy>zv|=vX3j0{GNp$Evq%LiMEz+x(r0wR}QysZA@z?&#kpdePukX
z^8w#Aa&!xLW>-A7XB2GWB#fZO2*9GG;_VL^G|qc}J6pu@xbay>kKUBZFa~R9>hy?4
z?4%YQ;;>5YOX0v#(5=zQx+$8Iu$SE+#&FAChB(k}X$J4AfoC4h%pY^fscm9c;zRPM
zvIV(}TbVa=^-^dr?p<W50;7aRkjR8*@@aG_cHM34W}+dswV2zwqb>su>2FWBAS?1y
zQCmtI4I<<L#d>E~Y^lH=4OwFo$Wh@XR~%I>$VSM1g%Vm&3CYDGy2}G0KuuRZf{b6N
zV2>rydHbqMd3L_iaaw&@3{ql6J5P$r-oxzO6VB7kX6P*!5nP2pk^cbA1AN=2%!eYD
z-y1^?g+Y4i^6(BRtOUAoG2cQ|WN+%b_on&Npl{-K^<O{f#4Yq84&%62ef05W)2;Um
z9ku4asQASbr|I3Y=o~QfQ)GA*6kr3@Oq}QOUM`Nf=r<O?6}$94Ndda7MY^$kir#u1
zFrkR-)=ar2kL4FbQfisMkN*Hj064`c$?BFaV*-={D!h|&Gp-LFtHTxQ@T$1rgJi&_
z=Nu0;=}*WCN5JZ-leXCCP;hAh$?lPP;=Fz>Q>34Phl8GT)gwPN!busV9Z;58J`og9
z;N0<Eb{N3!nocqWc%CtiYDMtvlDQuXcr^~BhFf-xhUGpf>&XVCAQi5^rsa}!in7HS
zdA&2G-C>x3e1lcj{*%+-U?OE8_^mzVsCL|huk}od#Ah-RMS7_EmNyiq*_YXA@c1#(
z`exU)055@0gVR=s-w+Oa7_DU5crC#=q)QEro=qe598197+>E@Qp0-&5P&oWkq|vO5
zc_=bF;<X0NbDn9fXCII%kM(%p5gXZmW>GY26gd$ZmOV1YJe7cF?-iyNkuDAaryFR1
z$U#*<R-cHC?5E_*wY{^i`5^}#aZt9OZ8<OP41x7prh*aL3Qya{IaCAvs4rKOT%=E7
zE{vhDfnqpM{8yU77mGgL2(4G`qbK+ZLv0vb1RT=K)*M|@3czH>oSQ)jgZ)ze$|v}`
zjDJ<C{jw2_qLcQK`B6zPT&2Pf@$bn~VW&T~Hba&1Mopn4fG|?5d#lJ4M-T`wDea%S
z=4bW2Q<G72hgcwtoyVr+Vm7zO>Xr>Ado%Hmc6`>MK`rLJAtSm-dd{gUOfL){1#XRG
zZHq|54Ao<vbDBVME6x-sDspQMZ1dE~<e2IzJRw|s0}g)_&lTWKDn*6abDBuQ-TWiS
zTie?q93D>;kCRh~2{<N^pd*acpTf;0dw2)A733e0#woY}(I(vEIi~*DlgXxDpQ!6-
zJ=lGNJ~5haW121P$K5CGfgRER08P%{ME9b8ap`jpcNjd?uJVd^kLt75lDslx4(j$9
zeaOLKo-6LKcBt}1X5$Cwc%^6G7I*VdT`C6>j=YLslsMpGrtbt9aQ;8@LAzw0I;khz
z&w$)iEPh}7)pQUE*W_C?J7k(B@4hHd#CKU_sf72MO#Ro1=DaCFIAW%^ZLTU6>Xir$
znlW_9&hh#ms{7WhX6?%I1$9Q<fIL^s9)#B)N7f;oTL@SN{;TQDCidG>)1kVzV8N6V
ziu77+s&Ga;k){;S5k*fm<pd08fldgVw%q<Hq~nlz`mGD-tdVx(aqgr>V}w!3sC~lY
z1aneHE<QVOXxIr*HsS~w0+up1ocH3Gs!MQr^HO^Z6ddqrTTCR0lDIt7s^}xZ6#^qK
z^eRBO8_!g&SV<xO03@MTfP1B%;Kdl{9SUNSKZ(UV898B-OXMR4d{mJTS8*JQL{M^b
zibUgpbDC?iRTF>=e5WRmDUFBSB+@#7t-%%G1cy7Zfl7!>(#+XF-x^qPzYV~sRJUTJ
zW@8WpW1i^MbV1{4$&!9FvoZs<dhlu;rG`nTj0OSu`lD1q;f7T+f%i)IR>I-A?x2xI
z^Mg;1f+)}e+0hegD9uFMaloj^1&Q5KNV@>Z+(_|7MI=iRw(SK@51Lk7;lb*VfES+4
zI5`n*$8{UvmB`5fBRQl8@%_>>78vMIx}D-#SjjZ36;yGW4qH{QrDiMjd(=Zu(pt_b
zF)X782deq{cP!<>B$Lf)eSN9iYP!_ZJcOt}5m}&D8%GW1xS6v1lCxxEwNN{j4e@UL
zRESDMLY_}_$wKb`0Pd+nryGaM6-|tK+>+R4z^w<TpJ-*tHJ(Le%V)D!dIW)kIV0mt
znG5x}Fv*_=n|(jJoE&0<xxR=x<a|X{>eueFgAXcvQn}E{5>0quEZHOKsBiAonVWGW
z(rTBapi{<bpv*DPHAJUlBzo#*b_YFCIz_p;zlJh1xrJA=seGO*N&0&Lxz~}1$V$zY
zdtlji2ydE4V)XTx_U)}ub^xx|q1z><n-m~m<C^CFgR>VJs+@3~*Huym0Iv<N$#SFL
z=yB9+kv@Q@p#b9)+%gV)(s7Ex(X+C2<Q<}ro_VOziuJ&$6aq3$MJB{*h1{g#yh$C?
zM%6u1xdi9`02H?CkWu6E0-m`elTB4-8OH*=bN<wBfIKrFh~|^zq>mJc`+6MH%Wk5Y
z`{WfvPB^MxR{EDt^yZ$|chf2WG6s1x?^o;Aew@+ezr1x&pzQ?qYvbS4{{UU;9;wwN
zca?>>Y$5~Vx9GGJ)XV7^HG0Ttqi?fQ`jPs{t7`K}ZKg`r_Tb<ZJ&NTQdX3H8=1Jyx
z7@U$?f_E%wnXg}`(qq9Iiad^|U6(wrOwvcqqPB8s+e@`(w+u&hOQGA%a;X@}=DJpq
zqdQw9Y`EjP&8U{n4Z2slnUs4#bCH^ftrr9Z=C;Q|ksN}&eb9H>SGGPQENd#s$+e_q
zVTk)=VyU&P#^W`%eJWU@B#!y4AF2$I-)v*WYtuc`F>57Q#@k&<84RpfA4a<G=@05X
z)|+)5h2*Uy3_$rFtBIrdqpU6(DV5ORsmQIGoh&sFags3UHFLo=B+tFH+t@E`<DT8Y
zRvF@nU^e7(Yl{AfdbG`99klPb00GIa)d5$JdYpnQ!0NQm4o3<-zATF*6xWf^9&*E~
zY}siEBLl@JBc7>7SZB>-jkwb+u54)U9a0hU>XaN3O)BTJ)e26KMuNexYGQsk#tk*H
za6zPz*q%9~ANJF?V0@mC@B!w$81O5@8*+Q3+yUyDzcxWmcV?sw#tGu0$v!FgJ89$k
z14pvGd9R;;`i|08*7-FgLAFjfuKU@|b5H)Dtc3oQS+SED&29AD?;fHuMs02-lLVvs
zK4w{2+bS`GT|e~V^Zwb<pJ~S&*DPrX1g&U&GkVcPA9)0KUqYL>pE}PT8PT;=d%2j2
zGFO@x_Y5G~#N~X{S_QzncWEDtRQD|g#`f{S!KxQ#NiHb1&~8Mr1}Cc1+p@@jpY}C@
z(C)pRK*KK)$^1=iG{z{z9#g@l5g^O498qo{bH!zSOE7{}2;82i-BS6amBBbfJPO9^
zT7<J|3-K+q^ePc6VEU!I&21~Q9oXuy%@ap4XXIz9==y<?boY>3umx+H-Q7HQ3{+#D
zL9G~DqfR#HnPbc07?OMZQL(1jat7xfYOGw9mv|T_x;8thFaTcrtJ5r?jN(1a*$1&+
z(NO;Y)ESWR;+Az*g;`HSm8H?0VYwyS2kMzJ8oL>a?)O`eI<E-~rH%l{Ra_f!%I61~
zSma@!mlXC8=a6g)TNymmYdS-cxFk^5R}p&v1v`hDu2~pvkRA_;R>+TQcS}4)%KU{;
z-9ock=NWO$AE`)ICUQHf$nHTIL5A~LrP%9`B$Hz8JQ2k~B&RsWYD>(C3m-htCSURq
z*{NV<hLl`i!sxj>h6Q~G>AmfzTbTwMXw7`_r%2ztgWCRE#eECuaVgX7z2V7Gn)9F3
zU-8HMoi9%9m*IFAG*obco%2Gl{{SchsyP*cit`wtr8!zUP0~bh5|R+7s^}ht8{rLD
zG(sfL-E>bxMiSLe-EwuSZKhurYR#GqD5d8#syG??r8&<Pj%c7xF<x5nTJQ%HiZ1#k
zil`)P9_j4>`=&3&f$E7<o^f4m794pYQ(To~l#}3x*-$B>B`wVqz+;Ms{YJLr(c*=0
zBFQE(qChcT5T!h*uW^bW$H#od6MOq2F|K;AG)jC^$MgJBx&Hu)Ut<%CbRE*NHH>{!
zjDsCg#-N|YI$SMk)8UL&q$H->2ayzvVwNbBab6{kX~+&U=B`RER?#ic$BlE<H`*Un
zIL3Le5t=(A;R4t=j2en)ht(8CBm9OcoUl@hL&j9>1!Gg+72q+(Gg3N$Pc;tnUVeGj
z8&d4nqq~$vaS?YI;+`YgxHRTsNHp@pZZXX-y$SL)G(-@0NLMEm^~Ue2Y@f@6RnI(Y
zbp<ET-)!faP=F5URgc{!YPX&}#Ri{}KuAniiQ|e)403TvoN-Fq?TcSWl#p@_Hb&fW
zP6n962Q-$_>^?<6HsiYRz|KV~5EnFrs2`eT_^Kz9%A}A57mOek4Y=}Xh9hlu{VxvO
zZcoXZG`XRQKIyEy)6*v=n;|*kx|Ft}mDq-wIW)OA=9;FS`2dc7RGYFmrG9CoV0S~S
z3Hh2!D|1$M4)1Jv#Z|WAa%ih5*aIDXCJveB!XI!1Hzd??uuwsznnAa|ML--LPju~o
z^n?~CCZTMnXE~?47S9y}AD8t)!ZKmFbwNqXU<_i9X3qdnQ-ZR7YK)7JqA)n6{0jF~
z;4@VOMa?_9_A%W#cp0cbn{si%uATZ@>yxEA>G!ToEN}@uS1pCG2*nVrKp5h;Y4bd8
z(OlxBxjwzHyNc@FW`aQK!yto1zyM-TIj@_25$o+kLzQ8f+FO64;=5O-y=SCrF{ia~
zsUhu2#dcwyIaQ-9pyFzk2_%l{Mg#=}bN5kONUU<Q1_$WWvOWM+89r(6z@yj$$T&E!
zF7_BDuqh+Peq$e1AuNmW859$uc0EbxXB1VWO*<9F6b#VN;f6`8Ybov0-J|Wu?yAWq
zmk4AA$Wiv-RI3&jKm5~KfyqC}QZ%^$6N(nv2^675aou_)!tXfzSDjAo4|K!1Sy8wK
zt-C-Zm4mQpGg1VL*xaY(^-=8``8;^1BJfW}hmHs;5I*zbp5%d(&*GX<v7F@7q7AS<
z0R-ZY9f0z_>}7F^S7j_PFlk$ZwT20%u;+uCWI_J`1|0WJ!i37yYQt$!-`#i;k?n5H
z3JCa@cp&joZNSSM)NvAc9GX?!Hw<Q_@(MkMbMf&^%L9z&og1=nJk&Bc4UVa#+hj_S
zE&z;^-E!~MUr`XlPJ#IU09FlcJzK3sq-hdLWW%t+kIitMV*T!}9_oHs&TE~k&0iE~
z#SbMGyv)<1C-OB?ROE&@e9@Yv%!Rk@<Hc3nPlpUoHOq1|1V};J2;@^_an27kF}fb$
zIi&zexs|rk?hOPi;3x#<tn6eFz(`;Lswh#JxCM{kRh^!btX`e4z}rufDyijBT(RSh
zQgo2po3<d}=b8#@)+3Y1tyPDhwHa_`liST(!TNJ-fA#MUYb>?6*ON>%d0&H;T}oN9
z&zc;{G2M1i`g5eO4Jm3={W;N?9JwFuR6Tw&<)F&WDp(Vo*7x+8Gj}uNs?^E)W2QL&
z037V6`l~-n`YZm6B%a*6!+q5m^|<E-k)utb$qb52>fff8>b9+Mj!DIB)PlSYYbWS=
zZLIY11g-<-wNc9x;G+U7$LcOw8$GU`mNbbKpFEt?k0&COs8N;XnxO9Iip<koGSSJ`
zCY1>-$>N$%`arJ(v}4T%=#38Dlf_Q}i2ndQ=7wC;kd@A9VJdu!DB344fKd%26R;^G
zH4=_<)jXy~?9(|mEhKuqBdlm$juZ@<^hY3rR6f4H{{W_XPWtP12#n+YDn&UokR2}1
zoc{p&hV``CzP9>pyr=filB5IKt}`PRHD#~ez2>EHaKw`%lT~M$`UZ;v!-+)j-l3>@
zvp3*w4QPEW>DhH@fWhOOP@bIA{{VS-yM|OB`L4g{tta*u(Z99uKZ<=uZ*5X3)8XFj
z^b6?^NW3?hl{SIaMRc1z4fb#p0p_m~-qG!qmM|m~>0NE%EhBAED~*zFZrRs@P*jRb
zjW29uF<gDsEyj}>V(QzmLu=ltj(t#BS-2z~Ks9$|3eRY=M<ygAl{Bs{Jh9Z%GWw;o
zYYUT*K&~O`^21k?B;Xz^=uKMKIy%U>IIc7LkEHt&o0!*(^IJ4iN_?4hv2uq7GBZTN
zXhk@pVQTFoigd5h{YjzJ{{U=X`Ey@YdOGx6>7iM&$}8g^LQZY<_#7U{74=7;uF700
z6OigF%KF|V9Gkw+W9Zo)O+MeFpb`kE!!SH`NTjhObx-3Q8uJZq>79z9GoHpN+r~E3
zY<T3;9&LkxRk`iuhi#HKcm}<KqmxZcGVVRphXq0NO%=Q3PKJgRBAku@sE#v^jYyxz
zjw;x82_Z_HW|dg`bk=+>0j7iT?wf=PvBPN?^I2cji&dY|mw}f%V<c9f5mcY3P9M>_
zf@3GzKm1j=DNQjlCgRVEuJV$)spBWAwa|euwomm`m+V&dVVq%){{S^6pQuRz#&Rp@
zu(?4WI@Cwx(a?P@G?CnFa(138M=S{W2yv5|$NE=Lj^LGZwZ~Pq)1mimnfLWeO&sis
zodVw4I|${3oX6d0^y~L+IOi%t<C+IfmIBWiAmWd{7RJbgV1x|dQ7h=Cnn&D0C7F#6
zq}iYP@%KPr-*jD!k*t|u8v{Jhnq9<~KA~odga(aRRis^d>t()*%MbvOjw^><Gs!b3
z9Ok;$>NUfQRY2St<QELl-Z4Xyn!g&TnsIbU`4S>H!ShyIODgQ=0;>xdCJ!Tjy&Y*F
zk8ur=;;l?_XytWcKl!O`mHpq`Fc~~h4<kH>cNwDWrFJVDbPY2hrhu#3Zj2If%>YXg
zo!G}>rnQyfgoytDiSC7&PW*%26vB99f^u>Bra2&Y!Kmaie-=1ekSDtl3)u5Pu$7vy
zl&A%=wN`+jBq_(;T3@R~hjGp-oXqSthWO{Is$oQz>atscy@6CkkGX&<_<U1o;4rFd
z$eJ0BIyF^=h3O>z`W>g`xMIGI^tpffUAMY^TSa`=r*1cK1vp~E74(;+5<#QdiB4N!
z=Dhy^^#}Y>{{SaP)4OH(8I)kr+u2kD)ejUyA<auEErFh^%_i|DBc;TeM8F-uis+t;
zKlV|Y=Rn}8&2(Qz0&J59s^$8R$?#ha5wcV982Y8C;>l5(kl=S%Wo@=q(d_auQbyeL
zDk|_lnrt~-8eo)cmgNQm14|sEU{T!;?tGeLZ-=cbUlo^h6<EI2#+Y{O3TK*dAgwbz
z*J;Y=p&?1)y^M^~F&O5Qby4!@gH&MiO8CbDyr-J;XB4Z52gtD;3TtJaoziniqb9D6
zsH6`fU;)J_JaJ1R;8RrKW`mnkrcTl0OP(=Ja=93!ZfL3POq4D^G{yiP=|ROta$c*+
zB};&$MRJ%qp|SPN8#vsY_@L0>^ImJz@Tlg$4BLiR5me!Zd8Zw@CYo?LG}D9CW{jL6
zOtP(ukxg`LZUc%=dZ%w~xF>-|GK~0_1g5$g1upJ-r5G#UG}Q)zTY4vCnA|f+dao}D
z-!<daNk*8XL8L6lj8vQj{L*<Ct0RrCAhCgXrB&S6J<_)Zoo>6IzNq2E<&i*=l`AJz
z+Mr^cjwx7j1tA8#W;|*Nk%hYvM>N;Yc&CbMo@&jhAbAmo#-al`sY?#L(<n-<1re|#
z&3K+UrI6;3@HwEkx*_?dNL1@4g;qZ9c^RXw{{YD0m3E2-OolkGqqoWD!nTkD4r#5p
z9mbz#!Q$F`uS~242UTbYUA%xrFexX4QZU?`eAF0xoF1tHd-%2wih|^1?y3I(kSu$q
z$eD=J&2U5pNF%C6;AWEq98!~>DvKELB+};sl}J&HSAplMgd~ZuaB6YaG$Ndw(QAT&
zuExN|@90$a*K^xQ9toWof9*J-->xaB+|{a8&?wtz->ACwniYR>y+_?^bkEj?i~)CS
zT=^on+hpdW9&=VLR!H>3c%sjtWBq8-rXb5~%Zf3-S+pgGYQjnW>*l!+9Mdz!32X8$
z0qmpbORv`5JrDS;9OA1kKU^fqFj}y#9tq7Grdr8&aHyc;%_sA-J`)?-iaQmj>c+pR
zPN{60LmYKoqtXp~eWpcmesLigBDmM-Po>4p^lr(R(*&GXe1%25y2h9RMg}XPr*m<+
zKFVg5(j;%^v<|$|$`R0&VU_4qMckjk?xQ3taavLlghREWA<h_7Zdjh_gDid^{%K5{
z5sI}^V)I0?O#2Ed@_f?7TijI6IjAFJxD4P_p-7v~KIqaRHNr5HmkepRd|`Pf#U|xb
z@;uR+ZRA$-iCqK#0CbNo-WOviHm|@mXaM<eMJX)qGtjASTum{MC<;z+DkJ{@*lCk*
zmMB%64J)5)fwqT}c#(l%2cOM3n51BaWn<nv)DBA!LCq>o>?7Owg1k`kIwWl)oB%RS
zIrkRsu^*})q)jVGYe?Mt@{Dofn>?e79_C0VDN%_1NA((NI-^^o6BK-KeAX&!nDg>E
z80w)e(wCM}Np4B1spNlf?I5jr%$XeJY~GbBBfPo~DIe(-0Wq?laCqXRTXQ3l!-}}k
zy)`}bfVe9c*{UZc6>NAx$<i~nw$jfijx;Vw@GD!RdOqghE8LSB9|pDhHi2iSqKKjn
z{{UojS}S`uaH3oV`mSc9T%fpgj;@>2r8iAmC^U~w>9$HX!@Y-+D@A2#E0B;|g&$y5
z&X=b%y2lN<Aof^R$4=1Wjx4zWht+W9u6bLoPSm|U>C)Z7h=WU!pAO_7syTEPU=G$b
zx`R)(Lz5bl_C-tVsBRPiSzPkVn=gvQsaGZj9S-H>_V!~Gc+g!2S#m$B*qB#%DmkT<
z7d>%RZI}IvvSw0C;*}Z8eIhWSy{z%|P~GTm%w9BNK80ywQmj<<MMR`27~+eJx=gAw
zigwYEk4<EUA~#nRCA_y**dfUL8rB5epMW$y#e{C#nLtxXD9Nr1A(cj4@PKT!w#ZqA
ze>HD!c^u;?z}9~M07PUZ?cDR?s}@_$J&xsb{Z>iG{@fi*ZBOub{{ZCN2IoD~{{WUS
zaasK%T3J*F?yq8Hkv9S>JArE?1cQv!fjRHGj40rPPdVLyY9;m~{Ssk7+B^HCD!^u%
z18(NL(iP#VI;&#S@OwOUT=(_%<i+S&Lga1)S40H}Tz|b|e^)dQ`u>D0k;;!IuG7-V
z89s{gCgRfU_@;19PAUM0>bs<0qJHQ90H-uZ{{WDs$NvDln)Ykebm?|^3HnZ<yQ2f>
zT|6`}%^1rD`3mm(7M8Qy!zZ`2fGU^MKc%mu$b?V1LF5|gHWq7d3a1|$!>HEd$oOW=
zIvlv;O%NuW(7}@#V0y)KFV(MB$D!NFVif!Cybr4FX>g(+Zvwej>YwSa{{V|G#q2We
zlNdbJShM0%xjqAvO_{2S<oVY~)~;do0$#I{S^3GYqCG1uzN>2+q@g4#FhSz61N0{Q
zc7Jfc^IMNcdIM9@6=Rm;a<5$0twyg;kt$^xeHNn|dysAIoV$#2CiC_y4eQMaHjzKL
zoZyfvu`Zi0wiJ*^^FV4|oEMO?NJC^}n#UbXBJRy{(`1sSkBsJsq|Bsa1k{m()+@LA
zlk{T#=TLd<nl(l_#Z$}rGp@pOlxpnb^*uXW$mGw`wF&VP4e3BetpNj_!oH#Ob28jp
zAGH<8G%wR#Qp;10Yk&eN$;Eap1~U!BE7sf!<NCg%9%mhn&q3(gY?xs!>a6JzwmJM#
z6VKH$5DaQ}9Fa)GgWpx=Cv<jFB_n7g)Oq_EuRD?jd2!_NO6utaE;$?z(4`DihR0OW
z%0G=LZOe*!pit+1$8i+w;WM86R7=6Z>Ym#|j%kzfz}JBeO9SGSh$fTzr(`nVfGDM8
zV*@TQJE~t>0N$3=2cKyF0P$A`0jj6gV1M{x{FB^9j6WnvbbLdt{B<jUJURaW=7x!b
z40)n;(f<JFTao_&-hcU_ZPVq9<2Cf058#iSYJcRH`87VBe`Y<|%Xl@n^xmT^p`ml%
zn!)HWys@$&>a;pYy0y4h!6WL7yE(aP&8k~_c#<GVG=86C_a%&saaDG=vovA0;0n6Z
z?_wk6l|E@oh%T)oL}ZL%y3GTo#TA{<P<J~XNUN*ogp7w!59Wo9EhA$&WhROeNwX>I
z>xY)`ASY;EtD9a1{mX6Rn&@7wy|<q6f&g$ktR3(TGd^+86{<gqHo<&R3V$UH*P5}4
zBr*J2f{FI91rIbrmG+5@kaJ9!!`~Z*;nh!S`JHevOgLE8@woXE`$M{Qk+3=FQ&}j#
zg=BUFj4e$pQ9?IK$vspthbjE9Dm#X6;z?>GA|o;o8*<Zkk)Ch`6Q@}<-O@u50!BFJ
zs?{gy?9wP>KPQueS##>~V1GA7u*E6#bL~)xEq-SWQ~(g!71*c!G<mk>Q-M%P`dEr`
zWNcO$daj!P03zk2$@Fqq^6kkybx>+ByDsn#71reaIkk*(?V4kHD$y5fWRJ~JFILm|
zWs65PaqT%+-oiNqu;=Q&nDlI9X!fC-ao9PmG#-=FV~`OL$UWk^C#PUoEfN$wDB`%@
ztF4Bk8d31gx=lO8-ItMp!6KT+=fy|{5Y$nt7sYY468Na2WEYZRKaUmCeH6d;Db6dL
zatH>xN25k;Bl@mxz3q}LkK$`%9}!ANG=P8xKC9Wuu6|v+sHIq>eHu>bTZ%;#kGdin
z<0FcU3^z3+DFY&yPaM*?uIO|V3xY9T&&r2K!4yJ}PDOU~x{r1#F9j2YGtbp7;kOm%
z=wiIMt-S6l5M)z`<GyLX>ZcRO6(Z>}3n9viYW>oY$L72S7n4^>ZfeC`AsC!snn8?n
zUO6Z6NNGISPaI~D(sR42aUhOqQnbP}VAFrqN-}AuAk}V7Bor(1C?^G6@x>ZX6$H!1
zYY(X6_alEr*kw+L8?VJCbI`9br2vt|aIbTtS3^&9jfzk>B>1Mb38t4F5e`F-G~K}D
zQmW>>Lg(Tt#TU2q4lHzM;6CYTs7Bg&#wnz&(4r~(q&;SpahgV?RzU_$6KsW7KXn4+
zu*DZ23M;AUSW}+&;G;aaqs@|euQwu^-PeiZHQ$Tu%+Cr(?!3J6X|33%{?d66u3I#n
z8j#=)DnArxgB;}2V>RdgRpXphYMXWw^JaM)9P%g$GFX)qT!qNN#wd9*tJQrSqtA<A
z-{#uK8KZ8|P<&f8UvGfmnz3RTfyPEE$nz3;0|%OD11COci4ZnO00cq%z8I*`@q)b6
zAURI*c%>)+1RRQ)!4HRARABAMBZ^+g0Y45-F;TaDQ8y|;1k?a?pA|z?#uFTJIj=C|
zih0W%3i7WwG^8U?eyE#Z%rQ}OyKPGgG8_tcK1jzpk^NL;VY;G7-Z7k1ObVWA&mbZ+
z(lBa91fHrn6z3FMArr1isKCz@L0r^3NG7St_7r+0Ea3pjHPC$-qc`}`Q!>a!XFV^b
zndcKa{8y^)f2BU0_Z(l(9u<F%MQGDubC8pgNZR@vL|rdRMI#b0`BsKy!Q+8a!5#(&
zigf&_QaJmryJ>KYz`rD2<C-0qmOo~vknG@4vyw2`6!Cn4lEWm559Xc=gSf6}R+nf1
zHgi)%>_A@4Tl*B3dV$F1iHbq97zUx(tfkm>OB6(*aCoTNXo&;fOiOGYXp3cSfEr+l
zOq;QqoT=IyIQ&;O8W;ShsGH}cZ+gnp`!QeSmv$EmQ6c$oIj5$4Da|nO-0*9rj0BTQ
zkOK~>irjE2IbVBoOa!PUxB`VL2?n)&4xM2o%&Z2{$M;+t)VjC*V^Vo0$x=ukiq?Ln
zdV#L)jfK1a0F8KJ4RgVe$30ggSDW0H)zUR$j@4ssZM~~zVf;^3K+u_G5y#0js5_-X
ziiED(Bh&gC$0zP4ToQ7i*FPkdHEh+78_4Mp`ft<cB5SU^3ffuQ3<Qk=sG@Y8G~5n;
zF^bganiua56?USZ`L8>!*X`rC^m<QE^z}Yv#8+&p()8JG7$i}aKl{F`U#5C;2=>cx
z+uQ1|bgK({IJPW**`jACgL7anfnIwzxiz{@+2~`R96#H@seSokL2RiAFv0*HaZdmN
zoKoB0DMJuos%i-*R1Fg!kBWBcotYz!O}Mlcs15)$>2@OX-BX*#Pl^+Yl--g6{{Wh)
z=2b|!Xl9yhw2NcM-!$maTfnNjd!4c$0MUseugi7;^HxVHYRYoyWddhF8vs<QGllM>
zkm4}vf8LvF+wwult2Z?+4Et_8gyU(XkOSXeRKvW+RgXR@7~~3eyK_tKeUBVlmmHI-
z2RI{&8uAEB>?#VL_@~J$BZg7MIh4qEebTIvn{i8!`*MXzz6xv2Dmc)!qVwJ;9Y0gJ
zx4ZTyLZDWdDIXdF_QETi?pzb*v*pGA08?(8Q{Mi6VfMEPD(+UIm4U_wRgg!x(x(dK
zv_$t>OMAHPB36WsxvX!7bR2G)4~@$n2UN#w=Wynr49m}1saTS^VNpuZn`w%;+rgxs
z*Y^>HZdQ;kM-?P&%HU^;XgYkLNqrC<Em23q0-<{a&vkcoa?-#TfD~1DV8myt(UN;z
zCQ*(qO5}iA!;a~{uxtz(v0w{hG~DeqSGy!x?VwNCHr$g@izA%otfZF<j}+Bq>>4jS
zx=afau7DQAmhTky)1MjLMl&lMjw`|OjGT&}$dcic*+qZs+;vU;gXg~L%m7c-JPZTP
zFS|C$vM8(n0H;1F7QpA4vN=W#o+-hVhB43GMVd^T5mfqQZT!?z7e;|)S3!a*%#ZS5
zoYS``9U5PLPqEYz=xTgaFV!^Q=O+}SCpoVXk&IOhZY*34&JH|NhkW%-&Q1X8mvILe
zpn^QEM+THB139K(oKyKA4C6G*mLojMNcf&<zR6q`Zhopjq;<|I9OUOTWd$z4oA7yw
zJPJT?NHmI0d8J|YX{&&Vl_M2V>k+%tSL6Qx@gM#w%zi4j)nh)KzR&*v!Zb0q<cTFn
z_?KK#HCvU(`p^FWG!g^2Ri3nj>lZ)|{{TpT`JkRA+%aEG!T$gnK5MD}0G40m(E4gq
z3xK?X#%pzH2e_!-aa{M)&TZ~cILOb{b#G2AE)}L2Rvc4LhH~{V9caGVo!wQ9sYA$&
zZTqL{@j|x1!nyv14e9;zW;a&v>Z@%cxI$ZL5qgT<jf2SnD#P<wbANadqcb-=S5MUJ
zCTmFqaWf2ftTbMi)u)xE7E)t83{|+YK`)av;l(L<GWTmKe3s68HB)zln<QqsAoM1?
zFUf6W8T%Bm{+R2M&NoJIe-$UH#Fyl1T5ONO&Ve$Wp|Mg3_OJo?^Icxo^lJYAaA%Y$
zYPFB_Y0!mhm11hI^$aV>{{X|taj0WwB~*<60Ce=XWMR%oJlAo9^mfuPJ=yv5gI1Qm
zrCK_SIV^tZAE{y)XmT7@2WHq8I0KPOF_+vSB5(Mv{{U<HbD-PGfXBPVTfd^TD^^l1
zRQ~{CmHMPSA(n;tXU`s(j#Sju-Y3aF`)jRe>2i|BUN$i{GI7;xq4f5TZsFs#j5q%P
zt5#wQWQ-!lqy4LqtIzz2?xSWcKenpMnKa0l28K-JQ17NnI3pv?XnnOn2wY~JbVCpD
z;2d*UWv7Il^jwohR^vw9&fY4b{{Th`3E<Y<+{Y1C0OqJ~#}3=G_@!D{A&g_`4D%F^
zmintLHX{oAY%t5+8KeOBaydUG=u%|0+p`QD)=ge0!xLz-59L8|9Hn^_1D`aI=Q#0G
zC>g3SwK%5y21=a_Baxcu{)_(r`gCHs!#FLs#%rVcG_UL<#d1AKr!EU%swS;Lz^Cy{
zPaoAdoEqo4yD2=?<v19mIj>Z{Z4mZw4MI;fGvI@YgyR)5E-J=}?0KRs`5n|9FI4DU
zq2jO7PB`+XAWb(Al0MCN3Px;<_@oW>UHgrhX^Y!(Y26IeS@~1RBw~alxnyW?`5H+(
z#VQYuO*9@&Y3??~>7LKsD93oDY*GWqHD2m;8WgZCz{NF)9nulcb>;$>dzFT$_Ti4H
zjCrqqDHvqcQ=4mnk{1~r(lFeFSFws!`V<^g_+h#Maf5?Sz)T$Gmf&IPGC>%xH8^{D
z8u~UTxeC}~NaCX-<=s6OAmH;%VTPodZluu5=zl{VX`_?HIhBA1ITXR&Jk>Oq7bBOd
zkp@TAHY{NAOP{JOMN?w(djNT)BZ~6=X>dynd8dN6U~HPzK0Ao1z~Ye<ssUadSEthA
z_OYn56y>?t#|L&PS$;H(4r$!tv_&pOV^ncS{nC!9j(HT#q>vD&AX5S3s(Bn!2LOXj
zT!dr)03lvD#Wnu`m3WRjq>JT|pPJw=xyj&EHw+7qI;ofrz~-9D2;f)I+C2Cm;PMVC
z&fS%v2BK9%0R?wA-_<_VT*g!msKe-ljsae%V5LZEIyn9$w*r{f`;@S5x)khyQipfm
zPHCbhZ<ctXqK%3ZiiqJ((0ubs5PO)*0l?yf+B1xER+Gp6B2>^ZrqacRNvd)xWJDk+
z;P+lw*yQJma6((Eb`Exd%~mlG$UF{d#epO4q-6!MoDeEDA20_s2oSg{`KHLL$2`;2
z@B!kMMF$&d@kp_fIO?L1jP+9wiN~5?P_l9}_fp7021a<LwYGbUm5oPdnwYAw!i-jn
z(>iGcKHR$=DR#LA&1a!BGj(|dswPwsxZ=Kt(QG5Kv5pwPP@^1HGxXm;TMHz(k$=l4
zJl8;lP5{p}*u#(8hwRHIY@HN=^-)|l)F9Nu3&XV!cIsm%IK?-R755F!GeOTXfy-bI
zx*Ji{Vz+Kvn#lUcUlH2-cJh4R>a25&BWy=+qd_*kWd+By&5%5bH&55CXP7D+eP+4c
zovY7ag_|z?)$}%g#k^&{7#-I){X$Ayt^~JdL02vV1Hq}$vmV^w)d#091go*%J8I=w
zc>%xPy0B_-O$?Yq?Aw4&eAk9cW3fWqOdz*;T<`{J4F^y+(X-<K9w@m+F1(Sl-I<jK
z2Oo75!{Rvv)aG6Ah@4<|nubMH!s9vTG^zcJLRE)gR&Uoj*Znt3kx|FG!x+yscdFhk
zm7C8g8&r2(C)OJDnx3rk7a}ll1!vV}*&1E4+;F#LY}XRaE4+s+=Q%WxLLFOkbK-@S
zx4u=l6lRl)iFN^uc59I-B(0j`9nO($q}mXn*l<4UL#J6ua@&G@R)*7U%&f5};<_KD
z><~t=i1;0v^V<DmS6!X`7M(`Ei2X67JdCX}m5g*g>uYankqxm8ym+X9SO8E41x(Xy
zP{8qCVP%VNcW0u=r#jlHeG<ne!b1wJy^(~BA2{(&x}39yjj9=@GcU|?D!iF@ab~S9
znow$OaLAP8ZUllUzTqe2Nu*=9oYPRL`DdETdzsmc5>`#_W<9v%d8U~OUAV!fjE+Zi
zkDkRFCdA2+_@WG;AOW%{5W_vxp>~X#Nx4cif$c`^hb_ajUfxbC(cHbe41x!m78!BZ
zik{|ml@)eij?G;hzrIY_H0!OrGSWMbbpxs5guEF(;4~}~drTL#H7~fO&hO%URh!h;
zCP?XC9G1EzYr*Vn9Fb17xo0DLV;xW%R5>}oH2(mOekUT8WRhl?>2b+$yDcNRS8|!h
z=7@A%fcI3<Zpk23wWP?=?PJYphE4o4ld97DUnr4XTEle-j#V6vD!Sg>>7?cU8RIo-
zBttIGG?(^^b8g6fxHOZ(rEMW`>L-q(-vjKWj%4>3@ls1B_5_?S(5$wfbv>ou?Jfxl
z4+rkFF%Z}yuu1Vq>}=-PS!{p@08>dm4>Z6@G~htyg9T8#+xX|Ylj5o|%`kF0uK*Nt
zLzF8W-z(iW3_+)(Dx8`?z%gvoI`R%i6({0zPKhy)Kl4rALET;%W1Qlv_@{nG<WdO8
zG~Kx58gM5V?vU+O$7YL-_CdpQa%x6wD9te$hzhs>*PI1AT+_xaE(CRajBK|&;*}T=
z?wF6s8RER(6)ZUvk%ZqV1<nhG$9CDzHRI7Hqessnb5D>mf${F0Y4b^O61i$|N4X&b
zXROqU`$5GGx%2#KH@@r<O$`=x%@xaCgZ{w)j>mM0!+tYExpGJi#YX~2B<1s-s_)=s
zWlz*_Aogv&5t>j`sL7}#hypji`_d-d;2x^g>9cQ^Q!iA<;V_-SMt9@=>7q#RL1Fl)
zN+HhkQA&{&0S`H=m#2NQm+HS!3*0v4fdj#&HN<(_$);Rg6pdnX2m*!|5sZ*IHD~&M
zHRP}TQzP&gjC%;ged2-FvwNpcXBv=XO|(Q8&atvC2sFtq5PY-FDdVBT6~SD+OEYI5
zwfz>ip5|$;=3r!O@kB%VBc!B+y~E@4T~#iK`322KG{mpSl_$k?R(YjLM>1IBDO*Mo
z$Mmby_M{R^u=^E%Z|R*D6C6r|AXTz6mcRy)pk{sGV2?CC>AxU&VEaT%q3K*8g$Vwu
z!%Wj*Y^AJV^^hu6EwCczcau{p;xG?%j&1hBvBo_i(8Fr56)>g`;=G300Sgij_NXoq
zxFNWw!wR319;i7sGKxgYDu8|<59*}1bZp>{?x7Imehn%_1MCe#*(n5>9OZjHDiLwD
z{!8-eq;K)C2zmRjNUGRo9GWi3s9@bG&N-!M+%O}GA_5Z|Y0?~xpyxES8xgJ9SsNhd
znslN`B--75(~uM$WQthk9H|}C3J7iG5^{K{Rx!6NoaT%XX4p);oYZmw#~t%av^Qi=
z8DqgbQ$#-!#GKUVNqmlnVxIVr6-CF{qLN^4iuVRHxZ#wKHAkx$lrc}_d{%<;G!Y2L
z-*rK)TD(uc8B^67Xo;p~Z6#&Da=#jy$;{KqxbvD4RtczlxDCZkrm0(rK8tZ&{crp%
zwjYshsk6;GKz8v;KaNczw%}_o#YbunH)IQ)+}BC;a-Y~lMRTAq_0l~Wb7L5;4y`4`
zi@b1ZMnD{RrOr(&V*pcp^;vQ!7ey<PPH91;CyG<mCnOqbG6!`lV2Wd4V~o+;!?31#
zh`^=-H+N1rJ}M?0@k^FcPR5E1jDsFCN;v9=Ors8XqJjFZ=94G650_}oky9Ew>Xm!u
zo4D`6sRk=%G$TCifq_wib53sYP0kxNW?Ax(BDfrL-FOU+X$i<AnnBei@<9*srxG#E
zM}k_E=jxUcX+8qbqZp=~gI-gL_Icz`Z-fyWo~c(Nyl?@eQgMo9TY=z$>2f%s@qwB(
z*FGp#M&}(0<9db%zj{WTGUABT<C;e~H0bf>yx~g>S24olX_wz35=h2)q(g@4p2{(w
z6p`>wY4*v<ZUw3yTrkGmSAoShBYis=^H$BdHRXtv4c$`0xop&7d7`2K3e};-?qplM
z73GyK2~Pr=rk5a&X^KWj&3b0}Ml;Xdd!bD|(~dUKQe2?W(VA=bPEs*Ukn=&l0*@2L
zMu#Bgq&;;}<O4?jKpNl5yvN0P&pc97FDQrR+7-dbsUs1jS#{6VC8$tHai6MmiUW|x
zjARP>Lp<=YOCHj!oSv#pIG^2Y80Lj!!39ogTMfx{t~?qM?1h`b5D&QWJW#?i%eiw$
z2z}`bF#|Nnl@do8IL{Sr1OYwNEQN9X(#LKj$v6OVX!+3`85oejbwVo1B$!On)$l<c
z0qiq{J8GU#rc7-h)|O{sZS4f_vs8C6kuLQBk2PLI6b_Kc{{R)@kFwETMg@<3*R-$l
zT>wT!Z1m(o5iK4G=;EklStjCA&FSkIE%6etmmHH+7y3N*M7yag_gzy?)8Tj-rpPC(
zQ2LdXyq4_)uHZ)*6r~Z*B>Cv&P%LFgV}nq++SnO2VXRpj8|I0J!#q$<%fA3|S4G-3
zCuDoi*EEa?<kFFbCp9D}(y%}~qCl-4%-l>Fzli3#*Xg&X&E?`;Ngw!Q>yGOo>1`#h
zCyL%R$s}&CpuH)lMW<=;!l(fQlUsDSt?zvzth25Qn+se<0*oq*3b2Htj5*{`@k|1c
zr=O~gq$D4@-0hU~7jZ0Vg^wr2W;MN3LmLorin6+xynA>AS3mWQ+sC<l(My6^P3jG1
zNto>?IqtI;mrpFnS;6PcNi_S5Z>sD0V@QU0Z1-5TQhRD7XF|{<ww5Tp$u_dp)~D^f
zO35$)Rue}M%WnZm$zpltwZh3GwK7X7Ja8+MBBj{vGf@y;VOSLhoL0`p@h1ae>{l%;
z$vKl7S3FkB(zE-BGr+Fyf-_kDLp}>{R$Q{!hywsg{{Y2J>CMntDZl{LHl;DX)mAVv
ziN#;QjTNfIJW<5==9IpGxn-hCtb+$NCB%**3cv;XpzTzm{Hs0smFjpj9R@~eKhDZR
z3cn<y1-X_*Ht59ut$N8;<Nl9rARut$8s;xEIpFbBT85)F-TaqQxo39HO&4!)4l>ol
zo?Y^7n`0VOSS+#^5vOcxW1?7l5UY11cUj*}+MA2fAsIni?GD{#x<zAwn&;~?s7I52
zj|-f*?9uv8#}Xz`mTKu1)~jm*lmKzfRcICw+S~iM%VUhu^4+nEW=3nqYBM>_$)2|c
z8%i)=<aO-m$QT|)3mwBaD#YT3Vsb}yBp`Ve%*ywkg9es3<4lnb*ZUOWPb$4vnSMDQ
zYrufG9nevt?bbF($WgfWP~fQp98nD45Ts}GOmVb~a%woMEXO95F*C8SX}B!M>XKEF
zj`pQy1x7d`qvNv3{)R>hKQu^U!vqmcw{&OT<B?LKS7yn;qXoeyFSJJ(JMcQEvjD=P
zV}>D56xf?Dw9-xyVM)G8<iR-=DssG<bZZ{Ya$CrzxWU3^nzr^;CZwqd{d3hMM4|jw
zi#Bk=l5_AIM>HRHNOeq`+{Yl(qnVY%DWH*xw%7Ahs5z$G`vmaCmJ&MrW!wvS0<3JN
z_aY<BQNhT;?vAl<W>6iD4PS<Q{+Y_v>C%qhE1{mE(+$P1zYqSbeDOkPm;2)O+~L=Q
zn$xL_sRO1Jht;jyPu907#Bv`LbKy!!uO)gNLzHC+F1bdbWm1KM3VO(-am`n0#X%W4
z1W}&q0#4iy6?)Jnt+O1xRoy6rEbIJ5d7PfXs2R}^KV;H=>nct=rv3~(vbXA)JR*I>
zkC#G~SmOar1a3Z9$;CNtO13jaXt5_^OVqP4nHZ@6V>H&0{{V>~R4*uCU!0Dr6Dtvx
z?yUYE72vD+`ToLW_k2h(P{?LIIL$(2koB5}blb8rX#W5k3h*n|=YQN8ZMeBIOV`n&
z7F1R&Nk83JM8(M)aYexn<@p-A7~;A`6U{kt**eIW-HPzIQVNm<4sjD>oKlF?xMRgR
zUA7XFY~ett<eA&EpP@)XvFwk{M-arB-PKF*0c4q!07o?&NO*2@L^xJ$&5DSK%YRiU
zDV_z6Nfg&Ei|%-*Tz`@?4oZ{cSEF5#2=ZS7qd>8(Gsw84iCmuQNYPYpIsU1#DJLpw
z62*B>@a?|pNaM<9AQ~BW1G$GaEOQ*3sl`jORSudB%ek95sKg;3ut62?bjctJ4l`3+
zKwUQ}<WWgJfZcXS6lxrd@leekAA`ksI{}Y0@Bsw5;)Qq(j#C=OIOe@Pi6+$`WOYzH
zd0<t(H6&4na9DvweE@UF02u%ux{u$-UTRs9450z21UQs~j8t|-M5?UIlv<12C~+X7
z;gm!+oDosn`)|n@G;4rgAnxxY3@-wl?&ORv0PdcW#H<t_?wn;%+gIhDsc1Gg+;ToN
z_)BjDcV2A3-!<fn*Pxg=C!#3OES6EVuzNJ{Km%~y^hGt7-5auUDhGLD$YJrQ-F5`5
zH2^)FQxM^aCZd6igDPp^8?FhVb`aR43I;LH-8G;)nf|CnFN|a5MMo<vL^mYSwEF{0
zD#q$0ws;(JdZ-<+-yUic9_9B32BwNKtGT)~Ewx0Xxf-RE7V%Bo?B5fN)UAOT7~s?{
z6$U}eR3QxR`@hP$9Sl`wtIBQUoKe>-?eg8#RbS3XBoWOUCeszHp=tN2<xX)%=<*j(
z#Yq0>4Qfwn=wO4m^GDe>BU}t*RxM<23oL{ABw9QF0HjjJ*lY@cTbh%AL9QB{w{B*Q
zyiqw^u>R|$dObj~j}^_-?%G9kzeo@J80#mfq~@p*{7o;BpQ=VY)1R@As$tG+7FOdY
z5}1m{ybs+W9Cuy=ilx}X@J%p`a%p_hIO931BJDOrcpUXjW0Q<^PC_x&DNuQ*QF4zA
zmrRY16iu@RS{_Bg<nd3p__uskjU$vZe1)1$JQJ}V2Q}^7X1x9PUTzduZC&za5Oc|-
zKg)`G9QdZ6<x#7q6nTb4G*Y?B^G#giiWA#>5gsIl=9Hg~G0itT^HLmQhCe)!j8J^i
z5%Q-FPARM~b3#gzprg(P-YKXE^H0HHk2T;i$C`M-xmYj38)P>X4Cr%^YIgoz(_~P}
zdFrUtVR>=M4+&EP0|t<fmlYuc&3Tgq8uRkOHRFR~k|XRI@{@{w%mYk33M(9wC808R
zRDha|PEKlN>#9iJ>R^%R)Vl#i6~F?cwL!_LTZ7GdFHOU@9{1ZZ<(0Y`dZwtUc*QVx
zT_+WGQ)5V_AO+1UaA^$$mynHB$fR%q>XlFXSD%t7(%ZoV{{T%ef#RNeBBKKo&2m8-
zgG_E|&D|j!Q2RtbH=C@E2=iW!Hj!FJe~|G@vQ;8Nq^YEo9lQgR^<PLl`R{H|Y_S+0
zbjZmJDn>3L5$7P&A%L`@cZ!}!Xslx0Yu;LzBl!|n6uNwi9D8s}0o^%dEW{^9uk8U5
zOa!ltV;}jbw|i3HE(R!>Op%<G8KLC&By5%9k^X`c8$!?(?3$$15JpRM4~n)itV*($
z<Laxm043b#f#R#0B0ZoqNhy(!Ip9{_&R=!`@dOVdxw>zRZViE&*m@%8-k77w*MnJ_
zY^yAIvrlh1Wx$tej}#2oC9X2^WCO)r3s|FxotKX5KX}MyX4`-|6qm`7q{`1%>G9lI
z8?C@Hio!73Ijt_Px~=@J96^L$1y#O>sT>0ff5ki*vlkJj%DfX-+6|f7Byo~@rPIAP
zeDk21BB}O8Z#0b(-WehH6yZ-It&KL2(#sf{kJGIi_K=I&ljPlIxAEL?F~v~WUD>1w
z5O9hI0*-?6)V9$&{{YQ*u_!2{jFOeRE@P0e$T+J6HsZs6?8}~OKMaani8%h~v2<;-
zv}ZhI(VQDpnG+(tmol7iD--IAc_iNZNv&<%s;Xs`fL`k>>RoCRZc^9=K20WBMiI6Y
z`7l#J!N@)4t2Ja(2~*u^gjt<td=+mME$)>C!k0wH-EwkjQ~W}*8NW!*7U^S|$^dw+
z-1ibm55>zyqmxy7ZkKEdG|_;-@qt$%S(o;f>I$CE)nabMZ8v36l-v-+ZACrSi%!%Q
z3w^LJBzIZueqn5jXqX4iDc+o#SyhfQ6jsP<awu&=w#}EP1Iu~7PR})@B($Rjv!0Z>
zib+mbZ(cE4m;w@5^<7OPtV#Wi6h88JtZd}DxnL0&JlD*Bs(z`HU)JQ<=h%#pqPiFB
zzpa+u+y04TC&-VP2Dl5##mOf%&(~@<J*&aD6e(EdW1bBaXDKcangFoPSZI*`>r;X?
z{{Y-GT>aEsbstGtm~`Z0+?CUPA*3U;O(NvxvAPYqtk6Qhlu#?KXlWc!q>8^BaBI(c
zrd}xYKADZ$YqI1-B8W!A9C1|_C{kn32Cl9hUI4?qRa+pFEuWQn%)GissL(eTY-ZH<
zM$?w_UW>dMLc4hfn)H2=PBB@O8gzE9(DKYU&S|BNKQ!!oOn~$CObiIf6dGtLC&?sE
zH-hW9eN>VJ%P<(vLZFK|-H$Z+B;5GM2%?_=lr8W>D>r>&p;-sHx}}(w2A~PY=9Thj
zhX^E+4sndrS)cfdNP`{@c&Xz+8wbru#-9LkuFoQo7HOxjK;%@8p%n5?Y9vw)PH{p~
z=BP1TU4|s3R51QuRFXTe-M0pWEWz>t#S!nb0lOF!T&Z$bwx0-M%2}{`sR1-B{1D6H
zrHz_J%~u(wStxlzHhs;)rb!D7d8pWBc7e@8vNUo4L8})at1M&AKQ%N@^1<v-kjP2T
ziX{mLAX7);yEA2m?*+D<GQYuxBh3lwt06U)xP#`0G}s;0fl>X{s(+dSrF$N0WGf_T
zoIOH#GAxlXHdiw?FiM{_eQM>^R!;uvw)WgxTsQ+6H8c;o09N@{>^HWV$JDsFxkYwc
z7@1fCI5hcSMFRu6u=M@3*Aah`GI+*Kds48twU9FpkAqqsI#y&^7132Ku>>m8MQ%}v
zW11nDv-Sl%!0j16DK?L|V+9NRMMv!<tO)FODWPWES3C-u5wT7?J(@HI&m>|(Q9u}_
z7UT~c)N*b?@oZ+D8{}o%kw<_#B8?R!C50o*t1cAOuFJP4%}22a+%cK~vUs*ihkD_>
zP}8%-$2^LYdjbkz{nE`M1SUn#6nqQhg(UY^ikcA^f0FT0M<6H)CMhM5gSW}?L#{^e
zgU(AzRCSt~G+uMXMwwOJjGFfIu!rEDjTC`nol7tSgP!V0<7kOua%u}=?Bqyz@lKf}
zD#Q{+5Ppf&qaiE+rFOPxi5xhp^;sN$EMlf86@NZzCPsuu2a<EmNRzHvQAGtTQg7WB
zNcyFWnAEcapQ=>aV>Q7e&F#K%-Axt3g*%ROP|LPd$f(vJtGJ#jEAUP$k<MF|12qy&
zxk-L8QwdcUBzI9fYp_VY6V(C}+dgy07^tobf~7$?uPxFl?E?)?TY@qQ=QP#|<d1vF
zFbkZIG)aYuoVO;R_kFm_1gBfvdy$izAI%yAA_8$Y$B+#fAu_OXyKpdQIOTo$H7t_8
z?Vu5#x)Dx-`y6dd&G7zImiFw9cb|}=Y+{j^5Zh0hDorj|%PaDH)2Lq}IoAw=SwxY5
z{NsueSjv3C_|%hG-b0ke;C$DFQY>EFfaCK+$WvYkVVr*1Nj(~qB3QvZu4=kS8CVo2
zA5>MoLSeW+RJUZWqGS;(9ixw`Sfox)a6zvlC>g~LjEf;Btn*NTjNp(|=9}+@ZccGX
zml3GKC?JaRh~!hoPvWB@Cx<Sjhh8hiq(k2yRT@dLHg$?se4m-gG%PeeIUV?~6#`Ct
zRMDaYC3~nC^M~T2M6wIz1%JdjJ=G4RiwcnZlUf#41h+V<U167DX2CuxSCS{-=bFIx
zH3b~tHC)>8sytP0wCcBEfB@!?vwg_9th$bINM-q=$pAROqESKP#SIzyqaq&=Ymo?}
zOC1_yjD6Qf^s9xijeOTVkB1e|Jv6zrLtOs=QjDUO(ngu3Q=D^4o@uF!3{rveu5_aA
z%7tbWpn@r{ity@!RLMetlbS~aidfIeq$0H9!x_X@>5)dJoT|W+Pu=xi+&iss)4?vq
za%57*Ul};B6eActQ_ItLQE<iEVNH^v=el_uR53r#6*(MNS4`n7d=yPI#8FKuGn~`U
z98wPI{jgM+mc}~#d#^44`=?3Q6pA>i<+pzUS3?{O<Wl2~tIflAO2p!-<CYPi9MX}^
zDw;<ARcew^2b0V`xTM+Vy7L@T1Ch-YkzEpq<x~Ei>&Tc-k>t~f`C^=Up&4C-5!Y*1
zoaFOQoDwrp4C9{bfvl9Vwv+7DkDf&yMKFMP@lGd-L7Zn5%}!GCS<s53r5sa~lkqj+
zKosak=gnV+E>cN8Sh942qFt&5Baf9iO*PNRSFVgyV3IIhv(42o9DZr!@k|1XT#)k;
zd{UA&{%Nh<As%R3<TQ9wTyxzjnnv!Yf(T9pG!suC5lp7#K1d-v^GMjmBaxc(9w=Se
zDE#1>c_ce|2s~5Sm)i=YlSnqZ7{LDkdh)>IB|cd0zLI(IlVl(#gHVPGhEjKVsoeXW
zo(GDEgS%+vqak~wGRpw~<&SxyL|)Wwf!CU((&dsl!l?6Cj$TZThnluLt_V5Q&uN_D
z#Rm5!6h>5iX1ru`mEFLoK|-f^$<K8e2g{X=G@Pq-LS3jSmF00oVG3>EyX)~n%1Frn
z08J#iAyYdud{i7&m#1z`rQjrx51O*I(~?4j4%akZiKSb~e#tn%`mA@!RPrW`BWk|>
zp0URHkoXiev@Y_6VY!V&)=Lf7wQ0c5KZ?;<TE`Wx=3%&Ye&~M8vR_3?TmIhX1#(-V
zL`yhH0u)^T09027>81lJy;6&4r(-OO@+P1aqEFu1EvWa9WFG>GvTHku)QLt9jL;L>
zs-XboPjyQYy`HAOG7>}a=Ac(7-bn;PQL_v&_f3)*mz8iE`>NYVboo5HiQrJu-L0d@
z_W-h!-4xpCkh_hhKv-NH4;iZ%HB%LtdzavKnxfI}t*<g;V!-!Q9>3N7khYfm9=NU8
zUT8Gs&=hU(8R{QWPjk5;Vk6ECNv6Xe?DtGb-aD+_l9?cmHW@53)uaW7r=1WT>Nu)(
zIh^Fje6p-e%XZM?a)ZIE)t5GCk*-+xRN4xebjh8UG7Q!4rozc?J;cc#YcQ;Z)z%%%
zc9GGe><Nm{x&gh1ENFZ9ntR2KRO|<VQr)bv+lUbk4+rs6z_rOyEw}Cs%glE$=+WMf
zlG$wM85@RkM4dfS&u%irb6ML^)*9J`ct-4f(ESbCFKnmwCuw3OPIJfAXgx#f%WWUi
z4|>iFjJQ#n&EHy1jI9vD$ZHw;p<{T~N(9BDh-9s5!>V|o;#rMc`8nUHHA%HiPFuN{
zv34Y9#a1ynq<rTeRMK)QBX)EL<W{Hj=HK7f6^O`QYbH6agZfVnWtLocq;8WM3fa~)
zG)1`H>y;-0x|W>)O)Vo(IT;m+^sSOy+oCwZJcC=CLSs=FJSfF^EpA6Ft$01Rr)lD_
zCch*?CurMia=lYTpgCn6Qe7|^U_95YnLW1NxhJDsu2ZJZM+ir5<c5WLTM8@Ap?(B{
z-->K*!I^Wx^Gadb+mz=uTlsKlkHkfv+eVLWbLOLrvYz{?Wh=J;e7@-dE1)C!QSv&f
zx(JJNgSV0fd7)XEq-H!CX_?TJ-Glc?4i}T%S1af-jXNb6*iJK=Y-me;SCe={@udvF
zb3@4;s~o4Cj>Slae<FifS9=9)kTZ(Q-TtQN+FjI^cQ&WoQ-VmXcyzgQekkW^{Yws;
z@^;O+1g{;_UMARbYL7tm_og*qBT3dNn}7{yBxB%)L&zeRE|(@B%$i?Q#i@H>M;nxY
zp+Lv%(90tNN17#r0g=cw#YoWQ$0oDM-6N$6R?kI-Odjf3hqsy;+{YjzkZ6*3AbPG8
zts$hJ{tibc#YZf8aM{f}c-w$EsbLw-HFH9zw~->?awwE=69c*uS;TM1sbeLSKSGSJ
zph`BZwbR*fGv=(7f}9GYiGXDw)yyDC7+`QkZqvRGJL(v=%F0c4EXll_W~=R)7GOt;
zvh^+_K_>1z)m@xxD-1R(UY*^ek*I@7NZ$HuTZlp-U=)fkUcZXw6^R%UYcZnf&2c8c
z`#>Gkw<+w*(H7m0Ij)n*EtJvA_T#?L$*?2}J0DbWf!5RAGJJlhJ9yB@OB@Pxtag<Q
zN&{4)<}ffL1&(Q+<1RrQebG=tC13H4$Lg6Pd<<oNW6cU-v<OLc<2CQw$-@)J;+!N{
z&ejz1F3*q|S_uWS&$2_bWGEl`rBE@jkN2qMmnpa|4MqEa7!p6)i=ZUQCS8m0HSDVu
z+}WfVkYsQw8<?ADD%2Ya3qYXZMGrD&efg--<SVp)y%LOoVg6MK${7UPoS^UXODwUp
zp4*Yf)d?z<oRvM)zqj`BBXUhkB_WbssvVR>H-XSXJ0_r(IGksrQJ_d1K5|Dj6Lv<E
zL2{$39jb6Uq2A<#Aoo(6_S$d(4c!jttunqoIp&M~0e;8Y&gyqy{{U3knc4RNQ#A0F
zAy5D~q>@;X6+y{w{8XnzzHEaoPaF!Bqu&@^{NjhWG6mkJBBB$Jq4R_FL!rAQmEJV=
zNX|UeuzceHpZ1{yl1xd(ISWUSTl%5fB~dii=LPZxDr*@bF%^@s(W?Idc1ar<>ZK6M
z6gV_f<PBQnum&cKf^aA#G5ex0dpP2o0W!+5o(VWLa=J`V>F+;m8-q-h@_^c1f{R%N
z-?SGv!K=HCJtek|K%hGJL+O$)@1ov+mldXp-3_#ab~zPfK1rgijm4t7PSe%b1and(
zOt%If_^Grx_sHl*2sQ0mDH8fQq6G8KDsD;}kT$nHtZ9$hlWeP%ZUHp68bnjVLZ6r7
zh`n!mB}N#0WK<Bzac?Ra0SnQix+{h*#?`LO-Sw3AvWzbL9;iFUkbxjwq;@N9bg{`g
z<whH`LF!sM%NuN8mNGvyn_o0KomH6>o<_z2#Z1bqxI7a>-A8$AF-M4~6pM0(E!p{x
z6<KfiB!ylk!5IdQw;@|ALfE;T@Z)dhliYZ&SwE2QYIIKc4`{>Xfu_O&$TQD4ra|s7
zJkb##_D_}vIH^_(qE`61T+~+p798fGFC(@<#Y)jMcx>i}3?w%V#=*sBHJqzpHh&d=
zb-cvPwMTUp=-G+q%_|xv6V$<yc-VGL9j3c7DLrDM^+ETpRZ%vD6zwZ;RnIj}p?|W;
z8{CViMmZ;nGA@AzgiJB(6hd);D~lctA#t2KgIxpDB#Tu1*FS>>MXrVE(i+h?u6L@h
zifA!3QT8dv`Qn=y_=<P>xvU9PQBxJq9MS{EDO_=0P$=w{;$H%DoQipq<BDJ;oKmpB
zt05;9aRW*ADxN94lZsGCTw<DdG+f;(B14byq#aT>RNiS^sf8oJ;L&lm<?e*#jY%7A
zIIS8i&m8IW#K{{$10QtKd8ZC(xaPZ(O{8W*mH}xOo~z6_uO|aEDY<e_0v=5%&fL>m
z`BRwTC`ZW#siw9wkxHi`yxeh1+(ID(9oL0;<83v1sGTsLKgyMX#UlgWDGqU(bm=|;
z*r9TV%?RLz#T1#_-9b2E$rXdtv2MbomqoaxQlW>u8um!zG|+QOPHTf2R_xHv5&5F6
z*KB$Fp^<kmqc8%2n(TUB6z1}HMH!{V9;UZ-@5LcC+rA9K<BaB-Ii)1xkONKIAdGpW
zBO|(312yB-1<iy-;+W*-oWFF(<kD`ap!pSv3rrk}a}kVFRDNpB*1$EpuOH)HW6d@C
zqZd|2esbKQjj+AaLn9JK;HMZh1X3|<$i#~CC?Dj{X1<bn;qR3ssRo*44naI0#e3XU
z<SuEC$=`qh@lwbYYa%Sl+qyM>8~cm7IbS%epxe6wuXOoYZH5j3`l}%>2|)#d%5b#}
z+IwyfA1@UvM$9*c6ccY*H@ofwiY<_eV#_GU0MjlQoO7S*y{JsY4U53@Ow!_c=GsO@
zQ#o)+I#y^jq?yEPo^i!nI!Lp`c5NfG_f%R;lEbScY}rt7Gg{3|v6d_VGm6b-Nh)DB
z7Qm}Y#AJ$5FUUs3g{6VLr1@ZyDIsK*@fZQMM<StES&{?_ILDDoB(b02*VRX;T6>^6
z;RoV}woS@Oa5nMXL3t*+y^Go?{ywWM>M7Yo45WY?x;Exu_a+n>8RXSxsfcKyRObhp
z30Uy_jb@WSxu+)HNgbNATX=6Rc1%|ys`P}7G`IqjzW|X$80Jm#jQ#}{>57XhTXfUl
zVH8>8uqz#_S~cFP)5;Fj<O<Yj`hhuFNf_)k66s-b-6&(gtuIwK8e=ImXT3Op&c(pP
zbPtNrUInwTlV;vVd7<XgRwwr%d^dn9zgxd}?3y|8kUsmYu2uxO0raiIOD)oHROhO?
z(QXJY2^b*oD+B3$Y})D*s6thV99G9jmBg)cu$BDL@EInH_^#P+2r9V8bhlG7$S}%7
z8n2dF)=zmN-mOoU{_#|(_MLoC1(p*uk;G2nf;`p-SHGBR3PA_uiq*^enN&bqBo6B}
zrRgoJYcj<n6exJ6gdF8j3{keXk?%B^$mP^#sJ%xS)NEedt{jXSx6q`MO`WZ08CBpD
zNj<;rHE;2N5zmU7E?YwfPm`0kq1W6c*z#&Y=&L|EEnSA|)9~XV`KtLec%wUs&plS3
z;>mbA3nWfj-ix%x6^&^<7orU%8C32KTDyoX2gBL}9z}Vfw}>aPl23J;QHD^1l^0=g
zl#(_6l98f#mPI>pTG>jtE})9c`alWUcen8spq^g~*Hz&4GJBh6(sVF~w-p}J8#J+y
z0q&xR4|k$PI#r023VA)$s1;a_u~{DG_&RZ@qtYt2HmRn`*BJx~YFuH;fss?4k`3K)
zLD3Uwf=BXfF~v&AsBxYRM&ohd09Kqq_Wj)DYP#}`7C@*T;D<OKDIHiIImI?NGBG&l
z)TE8W^HulA{H98*b_%HVCZ;s|R+A$qfGXLzLTeZ675(hdMi}5QBvsm6UhY*EV%KN(
zFu9{c`h6VQfs*E6PvFw}qxAm(N!FxkB)3&^c_3D&^#0NtZ6kXOdw_FWQ>58V3bnkW
zaTz(UV;m|GN+-@T>LrsSaz16&ze!#U%cV-u$efDPexF=x7O@>ORabUS70;UN*LoC+
zG8IVtRRr2vjYtn~1b4+%n+nRdvGm%XVq4->qboPJoDosnrq&rdf%>Ljk84tJI;EJg
zjBT$esDgyrUJs(^@;tbsKe89b0qtm%{{Yj@c*O$Q#?o;`zzfHMaf-=J^lq06k;*^S
z^;1T~{{U3O8ARvJIvxfagHOp^nxU6g;z6EsOKFu&tW6NO+n#ewb@tde=9*BxY!S)E
z)=eH1{TjEhkQCU&VyPpO-IF*JWu&`i0~xC@%Wnn^ZY|J%WL~>b4xWWuIjsGJ(kNVm
zj8*Th$&$uCIO3`;Oky~}JONz{AM(nNKdDRjvA*q>dVTD$E;898#TOZQGVF4HHAs_f
zm@wsft5~N2Mk{-h=;ZA|B}oGZ6)b3*2xCKUVUy<+p6|edepD=!S?Sjn)@ts=nGf+4
zIhf5mNIcMSt1@pOQs!9l%0515?sY=#S7oAU(n}w@pd8nwwVKx1m6Q<Q_^S;z`Z;V>
zV<0dX=8ZPyTT&e4n$WST;Lh?i`7zR?{{X0f1vwHup@QW8Y5JIWW-&;-=a1^5-Tp8E
zz^w0vX-?3r`&g+M6xNjwz*A@1V##B=nH)kOB$V)Ib`#Pu!60K554^w?+Hic)l1py?
z02S*U&~iw*2nPerMnvqEH7o&c>9f3ng5;4*X~zRSQk$s2+l-oo9FV)V@P0KUkw(V^
z_@Nr=Ncd28pA^-(EN}<PhXLf3V+zFy<HbtJ9B_e?yi)?XK*V!H+{oLMbGXs)21wC@
zrGX-u1c-htMn_d1$0rRTABt?Sg&>B(6cGZq?8ttPR4lTFC*nBqL(0>?MB#kXT0~GA
zc1<7A0;0GJ5hP-xd!=^cngV0GKw3;G`l%!KfG(^FADW-w77#9dqDewj`J!!ah#Oau
z-BoeCj##z~X03fQr?m5l2+D&-DcurHS0Pr`lRIzNRfjdBk8Iil$GhYknz5TtX&Vrz
z8)M0+VA7xVte~Hpb57vm=?U&eNm*SLvcT5zo<?}BHNC_r!V+*@X{n=Op^f<hp}n*A
zwLzZ)ibSP}T@5KiE2|k`7CXtK1MKFkbZdz29g<ZWl<}G;Pt)RMX%F%vx{ezoZFCq6
z0%_DG8h$LQQ-rN~A=h9jJaWdp9&tuo!nbw`!9Z+L$r?)7b;$hF<cVTV2Om{>#M^Jf
zGrY8t@{F@ZFavU)DBD?OLc7!Ac&o;WCq+94J=0p<40|@x$4@BonDW#{N}4j>qOn_!
z0&;pFRwnAu*<5XO^YvY3dr(jg{;I22^!!(FFd%Nn29kK>-O%vM6sYHs+@t4i1}a6B
zNIS4kf+`(HNw~4Oc*r4h-6gC_a0~PGRJ$}P30Vm9ijYaPWmuX-h+vaT97*@)feGR{
z89me9Q|%DMSD2!Yk2t6&+TZG_&^rQ^rC`gugVj`DGr)v%ky;6sHHaQFRC=Z#k2IA-
zregg~G?GdgS7<y}w4&Qo=aEzTqhi9N&T1_jC$<i9YP|OFu&UgP=vc7iVx~nsnuZR(
z=%hHVM%!GMCQAYN3g~{9IkQUVk&(DpL-fXfXB~1qSMWu#G_l|TPA8gbe-Tatn#F$w
zMHR{CBNTyw;}oniDF@XQ-r{%{`ma7n9MgH+Y0L;*nt#eHhq%T$rkn#_1e47n?*f)m
z;V`5)^-0%stPbAkwI2i!HVrtD#W98(HR47)q*)r_hO9Ct%GZN@sC#z9V9^{7Ytd<Q
zys%H`$vEw2nDIy(3!G9p7_UdjtJJ<?R|IP1+mTW?RD=NhQ=6>N+i8ZxUIjI|G~jd9
zHI5piszedLjUpZ?TdMMPiiv!Q;A^u3j}(kZ6zMP+aY%U-Id{lLp(OK5a=A_`$%9Y8
zD}zQ@{mf1;$S$3LfGInvBpX<OIHoTr738Oh!do9^)hkv_Zn-(44^-3}R1T@+abBxM
zhuXrIlNL#Cm5c)>n!^oGQ;K7rYWGws6!l1>igErl)jV@e{lpsi-AjYEl1Ao~9IZ&z
zvJF~m{{Sj-(^wqTeFb(SFBHY=nvwVw8uQ&6`4hm_9MToYr#F4qpZL*9as%^nD0dH;
zz@&9MSObtLZb6S9EUiJmY2A~w*V1hB;-LW!Pc+a@2|ZIl1&PiDJ~j$SBRQ#HJfkCU
zt35Vx?U9~*Ra}Q?!XEplSjbjil}Nrw%Llq)AyJSIbdnRW^Mg>%Hx27hJ;Me3{%M;D
zH@MqMnzQuC{{S}TI0mSMj>=9&TKakZU(3%TtDE3VpNLkcFo#!0R>E%XwX<DCb8QKV
zP)HorT=q8iYP-`UjtHm*n=Py@?YSq%;<NOq&gsx~)$ETt!4ndEnkf==P*w5wL+CqW
zv@LL;%bqh<Ghx6df%v1)tuBb=y1n*_2FR-CH-jf}6jjyiDB?A4N173JJ9#byK$9kM
zLdDo6k!6Gv+581pK=GP&xlEY*GH4xo}eQ)5U=xF<O18nO*1$rlWCdZ@O60z_DW
zNdBg{7-1eU;}zoZ60s_&B>hn@rrS0V$W}G3F~KTJvV3onH`T50aPtppL)jH95;!5!
zU_j{RoP9XXgv7h4`fsK0+FlE7&r~I(=YiZ4L20PT&m^38SdUg}ZtP->iDG%J_PM4@
zV`0$TM`F3z)80$6(hT!ey)#{n2qnuS^o(!&<=mUHdE&anQNbOEP@{1i@mMViC4*B&
z;3%#1GRH6_G1pZ@l@;XtfxmLlzp|KaD7y<mDm=4D6!(fAJ4p@6ae#j`RQC5<Tm}Ov
z#wjm>X_c=rTe4wYz+`5qtZ$~V^-8`zGByoaY14Z?<Z_>$Pjv#r6qoAf4CS~q_~zj?
zBpAB8IvwVV7NZn*5kI!<Gn#Flj<$|WkG+RRuk={r{{W{zg#5yab?v`o0BgIC7B?;R
z$koA{y<!O?j8$FZmtGEP?^20<wkLyBF}@}|WqPY*?0E13*;027DHlHA`X-8<Go0g^
z34;UY0<dc%y9|w39ZefgN~4(+MfF;lBN6QmD=+CONJ#b^HrmpoyD1IH{MXAfWLxB@
z`ks<9c;M6GhFqXwm0U3B&}$<Mg5M)YOz!wmfr<*?E^?#iRLhmY(8l~R-{guQNXTzB
zBuwTq3uB6i%F;w3IO3FTSn-p(k^YNSwaCI4sS1Dp0JQ6Ma-|<6c&E77fPy*hy$cK;
z2|g(zt@2sB<G{v@i14QeH8HjjMza^(PisS&BuRdF%@OYh#^J!FjVrLp%ZekokpxV0
zfr`rEX4C?XepP>}WzeKR)kGRy+HRQ-%duK?xg(1raxrUgJlI@UX5QCPhT0e{gQ&pd
zX0GjZSjYfBKZ9HsT+=;L*T(ABW*G6FDk~pSdb3THQt}3H4`5fMhO-p^02v-1C#B|t
zsYRbiTWYSOce}vwXhyyhOL2Diz#JOonm4SayaFbT(nG<`Yh}FyOZJt<SaVj(GK?Ir
zqdatZ9$4dRvcBfvOLp(%wGv!90sMHOEUkkva&kMVq+qSaFe}Do%BMDcCr^n(8jA3H
z+lX?_ijfY*$NW&a4&Od1eZ)tAGAg(#?9n90w5(Wj)k+Jw-St9&g$9WXHVvnrG+JPE
zx+h6I6N+TY&JG%&NkEW-38|G?hT)JZ&iE>=to5j9{{WYCeY=eR034bf6Orfch(oo&
zQQaD+!A5twAU?IZAF+xx9JN;4MTpaJ$skmotUIqQ;wFcW-GuYV#c$F!+idf`r5M8;
z6nxt(BSqW&RlbnWG7&Jr@M@OHgYx;!S;Gi*;E-#epOcFIi#Jsbl^}{5WL)m1rsYp<
zn~%EmjkPwDkZJ)7?*o8>obtM^%y&#h3Y&=nkt7GR^6{D$8YELJXbNNjRyI1^(XdxL
zmVh@VC`@hv2B3=`=Jo#oig?Y%@FR+sXfsij?ruHZydJ8y=**A7`2A6jIb+DiJW$fe
z(r_0zp)NxeNQNho<OKCaP7I6kJOR}WE0h2M{nKQ)c`&JfM|3;{SlMt@xC5$tfN{2<
zM0RhHs2W#Ui0Xy#8ss8aPT*IJ)2-qPOOd#Wh|2AN&4E%#yBBh~KXg(=ZH{InfxypI
z6p}?8zsD>EGAG^+?0`C_`$|{b+DWJZ$#U+y#{g29q7jL4xxT6vOng%~Zt4RBnJT%X
zbP5)0vhr?p$JGsUaWkKAa-+JK+RV?H2pn)KIb&pnmm>$d9E5$X_S;z_bsJc5RI<+d
z*%Tke8!JSMg51z$cV*WAiam<B<c=SB9nK2@-9YUiW@IOtAudW1tT;W>E?uLC+*tAr
z8VY=m(x8%A)L`VA>pqyXNFj+&ZYvGwt5&*ok(VEe=+^3ExP@_zgPLqyHpI%Vk$+;S
z7H1<r98}F5V$wDP1e%7_f7m;Hnj%gup%tmf#MdPA5VJxm8>?iWPzAchOLhRB5^82p
zE)|c}NAlr;J-ne|MTG4KihD$o9&6I5_eLq`NNi%Cj)k|_su5HSbxfRw1Rkkm-Mb1Z
zGOm0YlAUmg82KQ#omezu{WMa(P%!?gwg{w({{V3x1~qIERf>;1)p%tli!!LE!4^pb
zc^y=7ISylg_M%7t1CB=qkV2+HPh*i*wEPt&yqVupX=^f)7dyQEYoF?tO=ES9r~9tQ
zENdcdal!up=Ci)7(H3nyyebTeDvX$0c8wUbrAeIWBxih+g%osU40s0_s<}kbF$}mM
z;L#9G8!iq*R#RC!q^rX6DI`=>2RSCZWpXz7`=&`0gXBRg_e)kA`x$Zaj~J?}<RwY(
zinDFRWRbS3H50KIsqsOflQ6pT7FSn1=ZakwN=Brd@j~jWEV4+<-yG3eT9t)YHCAu<
zn0%uC)RJ<1(b14<vd?~gUMj{i1#)B2YwRLF<~Xj0>4Tdz70%%PVE${PdSqhGFaZiF
z_0~r<$Ms%di(qKu=u^fik>h}Af7ELfc#36ASq~NE>YFE7@k+VIXs^$ZcoR5m(z_BV
zxyx}+d~6?d(B(R0eW8NAgPhW?2Xut4?r9utSZq>w?!0l1=?d|Y)gqj6MOy5F5tH3`
zug5fphOZX*)n6Cb+@4c|MI!<Q1`bCxEChzeYi5Hs$yeE4SlS41F-o}G!KRVLD(4m5
zi}`eAt`8IPr6=RfG@q3t^-)RFqEM=78yl<3NPm?tgh5yhw29Su%_5#C-AFzUHaAy~
z@usoEQj_r_jAg?E5<ksLQAB>~T#VJY+HpXaWEWuLAXHZCx%2f!%(;xzR>^=(bMxsp
zYU1mH$mFbT5*`jK#PLXfm3fVH@3c}Nra1Wuded7QhG`_N42O2K&?(0lq)Y);(y?rf
z#!Y(|9ao<l>CejJHC$I=7_`<uDped(H*{Yq1oQn<SAkA%<4hT*f0cv4#>q6V{CTet
z=8>{RDphQqpPREVmLRx1d7&~eK|Bg9$`uTIa7_c1Nlx*_eJWfYeD?ZrmjntPSLXw&
zau5h4(pd>4cJDOAm4gGCj(ImX$DvV%`1eA>S}czI1I0rt@<FH}kd)x_NL1%Jr?x@+
zkKHU+Qb8tBh@su8HYD>(`60H@HG?VyX6mn6<~6pEm&tCcnpjODOrZ`ru7hW4w^q<7
zQb`{pRY@~GNTkbcG~3Cw*a85lukT?W1x`8?wwf%kN0BDg{M3&XvSBb8zUX#P+OspN
zTd|19po8~Rwh}GApt8z(HLcg1F?(yq->Su4MKN-A5I*TwWw5HFplK4vJl^z2aA8Mj
z8gRJQKw*?8ky$@Q+{p=O+c@!EL(+CZWBuElvvpf^nD^vQ!YP%hQ6<NuZ6$E<Hpw&9
z4L_r9nfJ83j=8N|Qf0Se<wXZM1&(uFIq31XbQ>wXJ)_HHhxZS=@mFyyXb4mSq@2Pn
zl*Tgu0P3binHOl_)sT!-X$GsKH|o~k0bRTb=NF0=Q?yr5*R9XA<?Ea3!F}=*?x>w7
z!D_lHXGwj)@v#T-S`7;Atz<@&mL79d@Ja>Ph^9aptqzqO7XY&iW5sgwHNDYJR%m_r
zq}wX6=fzDt>j@8%z^^nFt%8{S)g*yQaL%ex*lqACg0+4@WQN{N@+xxl8MF3Zavpon
z)mX+%adNmHbU*FqnM<R5O-08<zD>`hZ$!3gtl+YYcUox?9stUD@+*&N*H>Cq-^Gu0
z(sXTV3)@9eAQI!1uI`^aaz!sA##O@t^%~$|eD_o{BE=BLKXstg7)c`uJatsp!X=M8
zhJLE|@kOhaS4asPv%#pQKjSI@=AR(|aMcmyRaqmxEf&GkG+vLLHiTxwFBPkjV37j3
z&2#+*K?UGuJZ87^NFLQB35CaX^1o8jEE<_Vv+2Hzs`s!(F^wafKblrWDr7B&HRNQH
zExF|NP_nu>SL6!B+1yM200+YqZz=ih6pV_jNn^)fnq+J4N->}6rQkVM;Et-hvdX8}
z*uTt54h?!EA}atn6h8M^@t*#wc*!FK_ep<6t}PyLawTz-Qv9}jM&Ns>T1+WZ^2K@?
zTN^;Z^Gg{u*&J+^U4e1{2kMsF5-H~xsEJl{kxqy({yYjVyI@O-CEU2$%0J?Tyt9gR
zBijYPs-GnEGg5&hINmXu5mm}0tFI<*%ShRJ%*Yp>4O$C%H+y@D6=5y{?#4-`+m((*
zcH*shrTE!bvk%I-9c}&?_742h8VO~5s2ygXFbyGKcr@6gkTC(VS=vdLM|FIAB3OX_
zM=U&4>zrd8VxJ5s&AF+=f<PD*I^_)I;gKLauti7^gMr;WHt!p%Nj?M>6uZ-w)<)lg
z9i>~Uom&gv`%`3%zz%ve*n=0wD%BSfw<RTEMDe>AH0Wl-0!DF6X4<C|9JfBiZpX-~
z=at4(SMBbL0n{08^){1sN*n=GNTxXRCOPVXv$ts(=M92RY4c3-OsdB?uGW$@j^C5W
z`ke1OjCx!J#>{{agB5LUbgvkUE)UqC?1IY!Z(IRSwA&U)AMILS2RU*aGqcGx<ON&Y
zm-zLHUwsOKI~3XO1jRxpH3`W70AS4AH6_KlSjG>?RA|zIbCFB$NE-?_3Y-&>p&gxl
z-T>;REU~ugh1H1%Jf9Rrq%5}~rWr~rus#qS)U3xUPE9N``y(yMH9WA1XJfMqo-5K?
z#kZEreNf~)Q3J{xb=?fxmCGO|k4t3$7VK2;q#;h_0*zQMinM7DI3U!IZzPe1+t1xt
zLmITZm;jy)5pe;CZVn0_sHpG<E=r`dZJ3l}=+ol0=VP8IhL;Obw1+u0DFmEY$NWY=
zH5~Dze+4{KWV{_9Ij?)6hT<@BMOT4&@*xNS7^p5-C;@w@-r<JgKjMd3ve|zX0!Z@i
z=0k!81p#?DR3WfIsC5g88Oc+{S6!0IF!?`K0D58zZzN)#J;~W(fpYD}1qj6l#I{N5
zw_b+PQV0g2aJ!uU003#D8*~Xum4Vp$WA^sO=TbO0$XceEjm_&t93fzjugz=q%hs0Q
z#7_dKf5x(nsqrV8e3b~&CKTlDm)?en0l^qMvMYBKIrpc)!ivp$YS|SG@&5oFoc{oI
zYi)TVX%_|iW5o<jC~{1xx!Nu*Csxz8f<KiXE<bMx&0`JY!)mB9rMjyr^=4fqSf85l
zL)}8lPW&2crc&`gc&X&WG9NXXhf^|G#OIf*n>(bM2;6s`MLaU8Kk`$P1fajcVL8~!
z!k&2{jWXqxlz68|f5pdORkz8WxFiFN^<LZXBpPC|5#_uJY*#V9;0{eXn(_^dbI0z$
zF~J6&=W3|~kSVr}yLjwWSMnI3FZ60|TX!NTw=Ja6hAcx48lFFhK2-Bn^In9uig0`4
zrAws!$F@)NUMNQ_R|7m!e9=A2ZC(%Ny&I(V{PVbA{wRBg&AS7`DX27=S!OYl@qVg%
zi+A}V$8c}x#d^z1HrH)3FTnT5RhSPN&+!|vU0c+V31e9p0;h%_isrtgvKBXc;a`LE
zS@UC2d>XLL$txlE(ZO!N6u-PKJ+Mex7jm>?kSXC=ViFETQ2zi$>b#7)onko0#(>l(
zh{S%#qh$8je7VnxrqqHUW8)M8W3>_*Sw?ZqJ4<b-GIRA&-LB~+C(S=jQX~9TS%2b?
z-cfgFF(;a_M+6L2O{gOkYX;r5$A$D-!lG}Q{{VH;JupeLLQZR&L5<+&&2*njE^JVf
zisbsDdr@RMG*R+JJpTYJQzPh8{MHC_6ju8iA>;8%xyCEcjkMG>AI4mhw0k&eb03X0
z$RuLC$BH|u(qXF=c@);}yoBbwf0rhNzY=*Bv$qxHAP#HU8*xm|4Kis{vQg<dVVXw9
zE5w4?rxFP2l9jd*BOG(pdmuk_gr2FVx>u1Zq#6v)SBi4y6g&=pjSz;t7MCi@lQGUa
zT?xkSd!;=ZLV4o6-Bz37h$8h~ZX2pa1k;W)iV8mzPa-!fOiFnm3TkyiQMuX*=xhG~
zD)VO<q<`g3{{TVFR~zLZu@kD04>c5HBbt+;ioYM@V#~3_=8<#Nc=bpR6>9v2Ja{$c
z#yK>MdFqk56nNwZhA9t!bgBG#q$lNy4}^k;o~fzVG=$(%15D-eK=66M4M=*VC*@8(
zbwPOu)fuKq!&B7L8~IUvA|uXnDT_{SsjJAT{lxG!`>!6W&N9@Da4}M@$bN2KduRqZ
zKXer2NgrH_ou944BB7c<TY-V~UrCoIp8+J-i8$oa&yD<WDY9dX9MwGp@NKQ0#WmO#
zC#vz_WYZmmD`6$KZLE2zS;-VCx63_LyQ$*%0pdwD;5p`#oc=1qOR$peU{qyjvNu6K
zoUv){!DaZ@Nzm+s4voJH`JuEuCKzm0-9n$mUaTmvsSA*5w#->FZeiEBc=txx81Y$$
zm_u|!oPujf6Tu8DC2gyWiot67imm&o;Y)C7e2W!kp(C?+1hWEjS^of17z{u5@qmNQ
zE7sbCh9IPppL9&tqUO+oagoIqiOg9SrC?T2=ep}2ooM6JLY2;XHOjP9lFB!YhdwKN
z==<Y6sezBlYoqDyZ)@yFFOjEK1nm_m+@xTVDi|Ul2kwkTJ(}w|CD8OFYDpP5`^^yK
zL;=XBT*$IUgW3vL&)rpO_el-8Wq+5|Q7O0B=@q`I)V9XSk(%dM^5OUPSk=b2dGF2q
zv&~cnfZj&3o-<hUNhI1D=>^3dtZ*akKI3<qx6`fD0o;Th2a{E=iEc-<fq?3+Z7rl&
z#Md2<;wzZU&ASbY-d5p)pz-%Z>XL#tV;oc5Hbb-R2ccBHq}HwU`(qTRb2n5w3{|pd
z?@W<<EI|X!5G9g06a#>Ms}EzV2atf=6WwTSFPIRy5+NV)Mw%p|(=RupMYdJWdLz1B
zG`*eCkvHQ%@l3Xs9HP19t3wTnn|5>|l!_`&4$u<53kYUO?L?=Rs$EQElh8FSh0rY4
z_}W&Xy$s~!@m>6L_aLNQ5ElrlK3OK18wZh4&GBpo%_wubkxfH|Lnuj<kGkJz`iJhV
zQqh;8*{*ik05JaRY3R$yB?yiNM+Ury^$vlyvd8}bPf_R^^@;3~vf+V%kQ5Q%)NbS2
zEL-P9>Znpfu>5hHP{{|m54g%l)p(g&ZV37w8OJ0RPqW&5;U9KBL++OCi(CgIgY{9$
zBaeV8Xot9&bA<zntvg#;p~WY?5ZuI})s&H*^HQN!LAUwTY^Ge6V}aE=)fA8ChVUxC
z?%@vq03KNrq@Lyp0MdZyP7M-mnWf2YahiR&hz8!g_@MFFebkW-%z$&jsa;tk8+jg0
zD1@rypNTafVeM=jikn!DchJarJF!#Ai-#M|2UQZbIL0bW^O88GPPh@SZ1m9x@R`nh
z(=L?+V_?lm8ksp`{ZhOoGFXr5gN(c`2^N<P@&=@g6GSjln}ETna9eN!r?EpMfh0Kq
zaY~moT{e=xqB!IL5uR#g+NsYZ)N{qMX5(n_UWu~X81qQr?ue$Otq*1ZWDL^DDJljr
zO3VKM0+`@rWag3MQl|U`GFY7L1)C%4l^YTk@#2`NfsZDnRUi+sR@+^by6`HnV!^!i
zReHS2jIs7ftG2g`_NH>!Y;Y?#Gkf<$vhX;~JTbXVM9G^|99ZQqqEQ>L8(V7rDJ8dH
z=uxxzsOFL5J5!o`QZZ4V728QAM~al3owCZvIZ}k<9%$&IUu+zV&=$evj{||4ut<vR
ziAg8TJLMJhK3F!WBw!yjkt|?t2Mj35O0*(3Jy8Duw_Id{%?7#PDe=h1kz8_lZ>oK{
zSc$+UmMCIY!Q}X&Olmp<nkg_gftEqIw%_Wd60t$XY09e7>;Pt#K(G=#j>Qz318I<~
z?4t~)@M;`8%eaC*sn<6tAmfALpKmYtb0F)@3J*duF<ro%)MX_tikW0a;2MrcDZ-uK
znlwUT@Qea#8Fyn1A;*DGH0;L>oQm?2$~RQ7P)aJYVZ3u*k(1lW0|JA-l33YHXN+@E
zE4;iZ?xSI#h4zCP3F4b9*yItk{n9KfBOs3G8E^?!IRc6Wi+MVs$RPg!R2{_Om*bK7
zq`8(ThaeI4Ro64OwUB-_2qWAw$VPKQOw8<LLx4M`ETfk#nvO|Csf0UESRQDi7Cx1*
zeOFdQ+IA~^jdYz-?JRB9*3gDzTpW0;x1^$y!ozZ|C6mvZef;tyEF#N0HW{k;RAXXF
zE^;3XQ%~+)JLG*+^!U};x32=7YzkR-&yhyya5Ihv5^yUXd2)?-qe+)qJ3XD3wm~9R
z<NB-JdKfJ9_%}AuCJ7Z=eF2T^;a=1X)Sjc?do8Ra@S~cww4+}|6x@?Q`l`m!Te2UI
z6%5h83Qka(3srlCDQg;&u;)LD_M?39HMB?xEIF*QT(^BAWLFjYEu<{I!lpu+(CDl^
zrBUAM%wOK3TiIiVJ=T-c4euh5LHSeRO-(P+PNV)d%@o-RyRn*CB?WfomscwVue!&9
zKxubTUv4OQ6|zcEl1DNNbX>lIo<b3`oCWTxw27E%1z7xAuWw<+nQU~mnzWHp{Ym?(
zvZ+E*viub|NYzKRP5{PeT}<ZL*BQvB$#jW#9A$^fh|~fsOnebqt;#JUDqCcmGl*JE
zIpS{6@lF!pK`wa*6g9QfW_hA5_d=xYBoZQnxei5hl(@hD0C+l^F8=`i?9pFL%B6te
zmpQ8YOTr@zoU~_}KHbIMhNmAj7&H!<ZL;vj-^NPN-zl#qXr;DE_$=g}JDkS&!>=FR
zQfs<8UD}XT<v0e5)d91eHs|7VRuRsU#-QWoxvP`iDv?Z<*ry(jOQ>nm+sb>`C`M@&
zln2WFS3}lp6Wn4~D0{3ny9!y!wFXq4>bc|F$}Gp1cMKBcgSgMf`l_v2fJQ;d#Swa0
zz;u2Vsqd~gspRBUOt~4$wh6nOoYdNL<p2hS)E!!VpkqDJHj0QB6;?%9*w@M}Es4)H
zX9Ik3RZtO}XSzNDNdma5j!T(i5gV8Lu9N9^-D4Q8Wp01S7rtwzdQ^|trxnQcR@@a9
zK^kQKB9xwKsCX6Ooz;fYib8ojaz{1cfGJquQ$eM5c0s3;PCv$xyQTjCPc(~$6Us(E
zG@f%xI5gKkDt7p`N)?9`&=31li5qc93C28BX>KAAftvB^kmoeG_*C0u8b0g9^GK&2
zE6v?d-vf?AgONo?$jt}|#Z7F-qHCw=*_3Cs`6$gBR!fhTY0cFmH_M7sk}J7Onlg<{
z*O;8*yuVb|@0v$@L*#M(G{h#HKaDhCQR&$PkOJW5yvK@3JK~hK7p%~WO2Q;{DC>%b
zGINfqT<|ehVryu!T#hy6uXv=b9!kW2<w*Ykz=2+3Ge}J;YS=`FKkB7^=~wQNyP-Gn
z3hY7-DF!LTfGMhT%|y8%#y>tO`Qn<uWKxmE7^le?i`6zmj%n-74|e-_9nxpWv0~iA
z1-eu<Uv!l5#R{#4qEzK_D+i})9$UW4b3<=JIp(~2uQ9=;CyMIDc2LjFSzF<pP=x2c
z>U)BA;83nIGtsZ2^l&I^C?mQaL$u&gAO}zfW4ek&1QFy?O$87ZCY#1<z;lX2!NpR-
zd;L`^u&94D_>VNue36FPMAsJy67)@UzfS1!K?-1Dh^%*`ZA7;_%I61)>iS)m!DM5O
zI;N8{=9e^j+So^dCp1*siVP=RSFR>VP_m80am`cQ>bIWTC`HHOtFtiuM8}r*A157V
zv69$5{-8iOP;p7A>UT|ePzJ%^lUE*@guIGRB-X7qon(I^*vD$K)9I4wur618WOwya
z>8|rg6oOa9Y_D|x0NiOZMpvD@R&PX!A((;*Dse|sg<_W-5^CZaZBRl<A1@S7reyYY
z1yFfo%}aWq#vBfDLuhIjRYJ$J#aE_vl^LH0NP&jY&vaZ-wnHAMTNPEoeu$&}-)%}L
z-J0%6*G7~Nc*PWz83Ve+`jYn<T-KjdxY!#lis#;<xc6jgqH^#$MO4kYQrW<&(;~}z
zkO?5sa(r#s$?;YCG;Jl?73A;cx%!EJX(sqK`fY<pY?5|Zf5lBS&loD-Ahlx~$8V;L
zpPo9b?C(62lNsTURg=k`po^>Y{i&Wt+Nsd{u5Ic~MYSvACJ0f$6|&c?r!Z}97a2IL
zT=s-aK<hQG7+h(x{Bfmcv}hfr83cn`IV~o)w-*seKnJ5yXf~Tdv8$bVOm+wncqf|C
zJaP7$VV5Fk_suCmG8g(HxB7JKmLMvfpjS8a+C;Zw-e<>wT>{bALyf~`E11d>RyNVS
z%eRm&DfeKXIHEL|fVOu7B%1X0`^H9cE4`+M+(@xAKX8m2IO3a}k&#x{a3{Bs-B+>@
zcolZxsAe;dG+vo<*0%tuAQMqWp-$od0GdgV$(B)r_^i6T3sH%}UQHTouMR~u;ON?Y
z>fc!@2Xl@M8zj!q2uJ}%bG-vomdZeZiOH?StfuAxC}E!>zFn@<<kMtumuJ+yH$FW0
zl;hX`0O;vNWMD5Mkx&%x2kM_3WJn3;H1evl^TGPAcQ<EUO>~G+lrGXauSE#=x94aS
z`El+ItCDDg3HHCq@oxg6xjPGzivIv3dnJM5qet9vw*Yyj#>H5u>&;E^ToXw)*CdT6
zLqwrIEONBvF)N10#Q34+P|}89_^EIR`BHCnY_Y{(0XRu#3WO*>_^9r3uZ1Auq6$C-
zTL!$vMleY}X}4}xDupz8_m&5C1w4|`<0P7j;y}tUF;c_0!ZzUMqHWmHRFMc{*Hla_
zpaM7|nQ;L{9F4}Egj+Hgf0H~>P;FBLq?+KC>B};+k&hH3f*W%4PdlD5k=1z*_(&s>
zLx936Dv>N9<y@XI;+o)tj9`lOxj4u)PQ^T)=yO&VDrc=qmiY|gr--0tV@yRGcpvdV
zYM0U2sc8-=pG$)@-L{Ypt*J1d*+#(4M6LiB^WAgX&sE2%dd)1YTzg30A`f-H+D6ix
zt}Cg7O7Si7?VfAYwb9QnZXGZG03`Bk*^+sr7{FEpQlxA?MGibs7VGXGnvD3ZEaalc
zOfbb3<~be(5frS_Zv!7xExcuLth9Q7gaQUBt%HI+?p%@&6)YDIaT$y*&^*#g#GI%H
z`lge!h2uTcAn5@k`2f__X*UI8X^QzL?xd7|izNKKP$4Mx(YFTVQ)ZC98Trq20<$E(
zTRqWWZUM9Y)KsjFG(KqH*d8H*JFf;Gw>yZz@k7Xj9vZxnvm7z%q5%76iZXZfY0WH3
za-Shl%#o^XJ=AK*m{l3g5`M_#pDuQ;IjMc*E3^(rnv6VX#Yo9C+khDdwb`M_S>}~o
z?PHp3XWLd}6&R8@S1rp^Zl&0v!NQs-3}n}mx5OM{s&weTv<w?8d#Z`9aW+PAP~2Uy
zQ;7&4S)!%(2N88FXKJ@KRvoawpN%Y+Mo8N^$m)h>kSIP4L6SW9$-&~UeIuib$$jTi
zK<HC_H|guGUH9xM2stFzO42mTeKyM~pehdP+<5JkPH7eE0J4XCi<WK_RW_}$NRBbf
z9JlvJYFEx~nnoW1`~_7!;#nDp;EewOy-bo_7h!|^m{pVR!nX`KYS`$JTZ_n`L6Dy^
zYZawRzqn6ua(|v{U8AnsDenV(h{D!g{{Swmx^UpoM{I7c<&k;Xc%t;ZHE!-?l27#Y
zRi2W$m1C9VKj}4~^wrc)1hOdy1m?6T!hF9bERJwgiff%H$$KGG$sA^@u5|TXLN#O`
z$LhB_Lad(bj5EohBGaR~zIj-6)g<sbxcLq}RXUf08GEIBw^qvFkPjHG2d3?RX&_zP
zZtAXVEMjZ@qz2pfLu+cbmuMpxC8|dZqW6t0Ldd<cUs<z%({?H3nxoJJw)aWT4c%C3
z)3wE;BmV#;H3gI}@5hQcCfw6O#RVKrEYsiaeKoLq6^xP@Q2U&nty=}(`z?+4Ocqgo
z(ayj@=B>*aWTuSRGOSjam%Vm3@Jd|aaaxOq7Zxf3?Q1!EZ@N>o9t~O~hAU{p9C1{`
zBaEb(qcjxMS}RVH>3Fj3B~Nu_X`~2l3t$D4?yZIz%?<@RWAH}sD(GpVxn^nV=J+eb
zD`q5!ABpClZya)3iI<jBUe<y3OOGo+`c`&yn%(&$sx=toQLX__SlsWDeSRgD1egr*
z&1aJI$1>&c6|sudN}lD6h<jYtBK{c!%;9pN(&lb0NViQ=i;FbZAugjVy)djdsx;#w
zl~M8>*0)7xlF$YO5!Predf2euoRL=E_>?uWRE|%m0|_zm50O<jXWT&?Yw{ibYia7+
z1UA3EKm#WXKI;nlkcLglfS)y<QKo8)ty(BG0P$R5*9SCxs{)EgITQ}Bc^uapyC=FT
z&PCXpeyXg$gojU*S;Pn+;}mQy=gk0;5(a3?c2*ekTw1dBSlc!j9?ol}`b_S!LtMt#
z{DAz|P4t%*30zN9e*~Y|bP=dtg*(kLbAe0Fd#nxoP?R~pjVD5y-<t4YLwe|xc=Jv_
z$0C`YX-Ue^RER3`UXTFU?vZmy#D>AE6q1KTu1_4dit?OsUOB;}LUDpA6uBh`-^+?p
zaJlhFYrthqDwA)3(17Gp8bC2i&T6;H7+n4<$Z**e4bhX>KG^1l(`2~hN-5N~wa6;V
zg*h3g=K_a;vUZxCy01m0&F;f;G0r<(9#SbA8LuefkpBQI){A>Y@;f|J7YCYgriwh@
zB=9l+0F`*;3RmRUi1^iY*hEv4nvprEmZnjFF;?R1kVtomMLsEW#UK@_zDmZp=Dd2P
zZpA77G!$1Pv5S%^Ff&e8;+oymX;PvJ!y=laig6?gK*EcwAjV#4PgJB*6U7v`VHvvv
zCW4)}x#x-^R{&u7p<nS_PgBm^R1-AdkCCE@wmDo-*c%VsI^D{@Cxcit8NHmbza^Pt
zx*;w=Y-XC>SDcRNr14&-chQQE%`+mnz%>+rj!zXFU(YHiiJ3xWhc)zQyGH;QC0OE`
zSox%R6Pkd?OJ~I+nn9st7&)le{Th(I>WqV)1yw2v5L1-k;=S#=9u(8WGJR0pOq6Q<
z4}BC9Y;pndT_!zAngO$#;!p`;aLJHGYP3Ce<~9(_{{R!_naMLYTut(hiz2-CyTb)1
z#%N;+NXt78dZ8@tp=1yHfJQpaMRO;*-wCiN7}{fwp9Jx{NU9ilqkS<9OFjVvgH;z&
zNE06B1}d7<TbD?_GKSu{<W{XNWsZL*!0@-h+ag0Hvy9^lSRZmrYv}<W%0p6U{{X1A
zPYFrmDoE~v(=Jz0)MdCI!#P^hu9PIA;_PSmyDyx)fmb{c%|MJI)S}pWG(r%T?<qeo
z6<1^KNqilT$8})F{%Fq5p{Fy-h|Sj8G33~}p>*U`wTy@3ZfTl@&eS1TX1mTv(H@FF
zQeAvb*1@b*<ibbVYRl9YtkSS8RIsUb$sTJo*=^DX)W+LFyZJp;ucY*~1!OIp^;IGk
zztvLRPsL9)uEJPcLlS=wIj&ww?v<3zR!ywRA_|0HV-*gCZ~MEff$&b~EoSH5nBe*9
zv|g08XzdzP#`?QOi`>Xpe2{B)W#Vjsv<kxAS_GK<bIo-fN)YjF@@pZgTKikL2PV4N
zU|$whY3dZ@3<@{YL7PWMQ;Zr8C4Kl$x+h*GG?Yd-W;n%Bl_iaY3)9~0uB(7^{;JW*
zc`clx30EP%)GIlmLh(%;VfHcSn%d}=G2LHgW)cMMxmfYMlTk%nS$Cs?ZC7aE^G7rV
z&m$E)R>-3<{6M2E(|P$VdM=V*Lq$<Tf2WjC!<98dG;XpS^;>;P1}GIpXLSokjrs1Y
z<-SW1P>a|j`lQ;x<2a%ujQAYXQUV4%(^ZneB?c)sDajS4^!}m#Fn!Jk@)er6b%|Nz
zAOJE+tR?5I9?M$)0Jm(yrwl&p&wA#A9(Z5(9luBQESgF2_`c7hA%AoUBW#M1({bER
z7z*dUhxMJk&_WJV$j?>K{n+mz6T-lu9xLbBwDQK<k@PHDsL4iAd_N%J*;{br)5Nkl
z%eGBMSyT~<WGcnj4y!G`lR8S8wktUj=8I_~FyWWwQ6xFb6f6LwPzsZtsy;bkwE8`g
zRN;BAMy_Kl$0Cr-UP1u;SC-*pl~fOwdZcY`rpp!A3zN}q!3Tr+uOSB@_fp<SlDS|o
z=fI%`Rk=LWbhtr_ZCl}zY+`w*Tgb}f15S*CAi>D<Ni311*aH+4Tu|Guf=S(&<o8iV
z(q{*ePn4?>gHZ;=nDA;-eUdFC6BCo1jD1ru=^+P@YsoN20E~(%Ev1Qd%ATpC6;)+(
z$+U$I&~Uub5h-=x^Yu<hhy-JTX}8x_+I%N6F7h+qRLTvsX342uI~zUH*nnjq8scB9
zKDXO*C7qP}nD9tFRNtzez7U7AvY1IJ;IKVc61%vT=604xEX;CAuKxh1`gPRuFYtL^
zR`rdqW*5(vX}?b7l21~rmL-|8O??}uW=nMp_yAYUpQj90)NuY<zKgOXzzH}Nt6Y+r
zINCPl7iFtV`xZ59fm13KE4A~U=qQdjP=C6yiCLiwjz^lqc512(joFz&J~2{BykS_L
zC}@ZH!jtm*qc+N73`jVl5$;plLlMFAO_hP=k6vmrvh4t4o~cqPN)aLIrGug2q7jzQ
zH0ZX=xZ{8-3?Kk!n)GZUWBI74jR@d}-h)4N=;d{N#dxV)C>$JuGfY-8M97#MXjL+J
z9VEfqAu&vm-vIP$%Evs8K%_|v%eNdFHcAJT-cht<8hkMA!Dr^3a13rjjy~vSox#Wl
ziXY@6+%m@Go=K#HvSEqHH7Z8Z#9xiaplDgvMLP-S#U4Sxm#rHf2Q(CtHqx$h_%$^0
z63PI<Uo~BQDKaw?%}#(Z<WkL$Fep2TA}U8;nryAOY)tMn$zo{GB#3t&sgW52I-=kV
z=B@oJrM9uDBtU<SPFkw6pfY3ST`%;1O(}0B+@lJ7nzI~kR3=I7jhj0?KHpE%7AVnC
zK;+d8<?rt<4Xj5IFK6zowcGP^axCrt0Dv5kRF{6#ZY^QrMjs>fTBj~46l0CMWk#g&
zO)QM80E}Tr*{X04zOj!#GfS%BF5WeNj9(@r%{I;2R8|UiyJ3H-=VfZF>tl@~*<n#-
zB-?QF5E8GVx?4RmD>nXr$kaAA%O%thZOXzj)o-QgQR=$8mmlK;0<+?8c`~@|<aoOM
zzKYVX4EzB_dV26%Y5Pd|$=RU2d1P#o8!Kb^sI+LVCJ7XXMlpk0+iFRD8R{~Fk<ql4
z38l07BBauZ_T-c9hGqS>l#%_4BGicV;zt=3qB49O$*#v~IcvFGRS&8zc50EXGlr}E
z6orOYK5GxI-n)8%(2Rg6WR+v0-86S)EbYvfQ%In8!xeV+M%|)s`K;%qE}Cn(103PF
zt(KtJiAfZg;H4eKnu*F2+KD4f+mcV@Q{%_&SQ;5_RbW-mCZ$F#3s#uQJ`64|$RsvM
z#pG^5@l|^N0QjO9-~+{7S+Oy=&1F4jbjrdt{{VW+sElKcvu2gbc~UgmT%uhvGJ67u
zm9uNbQu<PHCY%w^AaE+u5LqNjyB<q|qKvrq&5>#ceTXFVD?6ZGEL!P_TOqrx7SD3R
zMQ;_3RBK&nB-@4E-BF%yE;2Ofpr;u`+ou<DMtWgDn>b2Jr9ohGM9nVOhGG7yW1L?8
z99E~bu1spzL<tfsDI$OZ_dx0~jjR-8V>M$EsCjLP?~zq^*D8(e5y3bng(NbzW4%dk
zX110jA0iG1-Er+g2a0JUW!gbK*Lc5F-?>J1pZr%e{YTTI4sWfO0l%0YD!h@h>~gLS
zS#bXV_ZV*mt}NY1O{W}Ie^QjKz~emC&YvJLp~s4&EPvZ&>HLv-5$GOh+ke5T$O%81
zv9`(TxKdw6lG4-?0X5V8Br&H(O>+xOf;0DBL(>>Ke0Z)Ws<p9Mb~8$y3UMBDOimP2
zyw)p9D}g+S<BCNYrG7;;aaA<81#~XbPAN|Ukbf@e?!O#^Rw&wS0l%>qkxXypN)Hs)
z?vo?9g?27@uLF}_9Ih#<Hkwr2Yv_lG`QnuIUOYBUdyaEPxgfz_B#Lmyibf)nl!iT=
zj;S%X#xqI|1}QlrqU-hn%G+2GMQ1BO^m(EolPk8mUY(n3aDGa&M)@Qe&M8iFjM576
z?z<7{%)^gV#UyyA6UP+6RzJy57ecpn?3_~@hNo`$;+Fdj4N=WXt&vg0a%xU0%q!ty
z%dt;fSCfiX#@cF}R*xchJpTa7m5D1&W6xFMc*RZ?kVVCLfF_Y^%t<1bjl74*r;*Jy
zK<<^g?wF>de<VaJpA?5U=8*SEJ7EqDETvIxg)3<SOxb=ZvTfKfdGkrmPc(#JSC^JY
zDYn;U<f$R8z~Y&co+-=?N7ZG0j)G%gzyf)xSPW1hwy-o}m1PGt={-AAvqNmVMm*5k
H?H~WyoW3dq

diff --git a/modules/gpu/app/nv_perf_test/main.cpp b/modules/gpu/app/nv_perf_test/main.cpp
deleted file mode 100644
index a0363be12..000000000
--- a/modules/gpu/app/nv_perf_test/main.cpp
+++ /dev/null
@@ -1,486 +0,0 @@
-#include <cstdio>
-#define HAVE_CUDA 1
-#include <opencv2/core.hpp>
-#include <opencv2/gpu.hpp>
-#include <opencv2/highgui.hpp>
-#include <opencv2/video.hpp>
-#include <opencv2/ts.hpp>
-
-static void printOsInfo()
-{
-#if defined _WIN32
-#   if defined _WIN64
-        printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"); fflush(stdout);
-#   else
-        printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"); fflush(stdout);
-#   endif
-#elif defined linux
-#   if defined _LP64
-        printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"); fflush(stdout);
-#   else
-        printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"); fflush(stdout);
-#   endif
-#elif defined __APPLE__
-#   if defined _LP64
-        printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"); fflush(stdout);
-#   else
-        printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"); fflush(stdout);
-#   endif
-#endif
-}
-
-static void printCudaInfo()
-{
-    const int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
-
-    printf("[----------]\n"); fflush(stdout);
-    printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount); fflush(stdout);
-    printf("[----------]\n"); fflush(stdout);
-
-    for (int i = 0; i < deviceCount; ++i)
-    {
-        cv::gpu::DeviceInfo info(i);
-
-        printf("[----------]\n"); fflush(stdout);
-        printf("[ DEVICE   ] \t# %d %s.\n", i, info.name().c_str()); fflush(stdout);
-        printf("[          ] \tCompute capability: %d.%d\n", info.majorVersion(), info.minorVersion()); fflush(stdout);
-        printf("[          ] \tMulti Processor Count:  %d\n", info.multiProcessorCount()); fflush(stdout);
-        printf("[          ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)); fflush(stdout);
-        printf("[          ] \tFree  memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory()  / 1024.0) / 1024.0)); fflush(stdout);
-        if (!info.isCompatible())
-            printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n");
-        printf("[----------]\n"); fflush(stdout);
-    }
-}
-
-int main(int argc, char* argv[])
-{
-    printOsInfo();
-    printCudaInfo();
-
-    perf::Regression::Init("nv_perf_test");
-    perf::TestBase::Init(argc, argv);
-    testing::InitGoogleTest(&argc, argv);
-
-    return RUN_ALL_TESTS();
-}
-
-#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
-#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
-
-//////////////////////////////////////////////////////////
-// HoughLinesP
-
-DEF_PARAM_TEST_1(Image, std::string);
-
-GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg")))
-{
-    declare.time(30.0);
-
-    std::string fileName = GetParam();
-
-    const float rho = 1.f;
-    const float theta = 1.f;
-    const int threshold = 40;
-    const int minLineLenght = 20;
-    const int maxLineGap = 5;
-
-    cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat d_lines;
-        cv::gpu::HoughLinesBuf d_buf;
-
-        cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
-        }
-    }
-    else
-    {
-        cv::Mat mask;
-        cv::Canny(image, mask, 50, 100);
-
-        std::vector<cv::Vec4i> lines;
-        cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap);
-
-        TEST_CYCLE()
-        {
-            cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap);
-        }
-    }
-
-    SANITY_CHECK(0);
-}
-
-//////////////////////////////////////////////////////////
-// GoodFeaturesToTrack
-
-DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth);
-
-GPU_PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
-                testing::Combine(
-                testing::Values(std::string("im1_1280x800.jpg")),
-                testing::Values(CV_8U, CV_16U)
-                ))
-{
-    declare.time(60);
-
-    const std::string fileName = std::tr1::get<0>(GetParam());
-    const int depth = std::tr1::get<1>(GetParam());
-
-    const int maxCorners = 5000;
-    const double qualityLevel = 0.05;
-    const int minDistance = 5;
-    const int blockSize = 3;
-    const bool useHarrisDetector = true;
-    const double k = 0.05;
-
-    cv::Mat src = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
-    if (src.empty())
-        FAIL() << "Unable to load source image [" << fileName << "]";
-
-    if (depth != CV_8U)
-        src.convertTo(src, depth);
-
-    cv::Mat mask(src.size(), CV_8UC1, cv::Scalar::all(1));
-    mask(cv::Rect(0, 0, 100, 100)).setTo(cv::Scalar::all(0));
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k);
-
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_mask(mask);
-        cv::gpu::GpuMat d_pts;
-
-        d_detector(d_src, d_pts, d_mask);
-
-        TEST_CYCLE()
-        {
-            d_detector(d_src, d_pts, d_mask);
-        }
-    }
-    else
-    {
-        if (depth != CV_8U)
-            FAIL() << "Unsupported depth";
-
-        cv::Mat pts;
-
-        cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k);
-
-        TEST_CYCLE()
-        {
-            cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k);
-        }
-    }
-
-    SANITY_CHECK(0);
-}
-
-//////////////////////////////////////////////////////////
-// OpticalFlowPyrLKSparse
-
-typedef std::pair<std::string, std::string> string_pair;
-
-DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool);
-
-GPU_PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
-                testing::Combine(
-                    testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
-                    testing::Values(CV_8U, CV_16U),
-                    testing::Bool()
-                    ))
-{
-    declare.time(60);
-
-    const string_pair fileNames = std::tr1::get<0>(GetParam());
-    const int depth = std::tr1::get<1>(GetParam());
-    const bool graySource = std::tr1::get<2>(GetParam());
-
-    // PyrLK params
-    const cv::Size winSize(15, 15);
-    const int maxLevel = 5;
-    const cv::TermCriteria criteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 30, 0.01);
-
-    // GoodFeaturesToTrack params
-    const int maxCorners = 5000;
-    const double qualityLevel = 0.05;
-    const int minDistance = 5;
-    const int blockSize = 3;
-    const bool useHarrisDetector = true;
-    const double k = 0.05;
-
-    cv::Mat src1 = cv::imread(fileNames.first, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-    if (src1.empty())
-        FAIL() << "Unable to load source image [" << fileNames.first << "]";
-
-    cv::Mat src2 = cv::imread(fileNames.second, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-    if (src2.empty())
-        FAIL() << "Unable to load source image [" << fileNames.second << "]";
-
-    cv::Mat gray_src;
-    if (graySource)
-        gray_src = src1;
-    else
-        cv::cvtColor(src1, gray_src, cv::COLOR_BGR2GRAY);
-
-    cv::Mat pts;
-    cv::goodFeaturesToTrack(gray_src, pts, maxCorners, qualityLevel, minDistance, cv::noArray(), blockSize, useHarrisDetector, k);
-
-    if (depth != CV_8U)
-    {
-        src1.convertTo(src1, depth);
-        src2.convertTo(src2, depth);
-    }
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
-        cv::gpu::GpuMat d_nextPts;
-        cv::gpu::GpuMat d_status;
-
-        cv::gpu::PyrLKOpticalFlow d_pyrLK;
-        d_pyrLK.winSize = winSize;
-        d_pyrLK.maxLevel = maxLevel;
-        d_pyrLK.iters = criteria.maxCount;
-        d_pyrLK.useInitialFlow = false;
-
-        d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status);
-
-        TEST_CYCLE()
-        {
-            d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status);
-        }
-    }
-    else
-    {
-        if (depth != CV_8U)
-            FAIL() << "Unsupported depth";
-
-        cv::Mat nextPts;
-        cv::Mat status;
-
-        cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria);
-
-        TEST_CYCLE()
-        {
-            cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria);
-        }
-    }
-
-    SANITY_CHECK(0);
-}
-
-//////////////////////////////////////////////////////////
-// OpticalFlowFarneback
-
-DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth);
-
-GPU_PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
-                testing::Combine(
-                    testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
-                    testing::Values(CV_8U, CV_16U)
-                    ))
-{
-    declare.time(500);
-
-    const string_pair fileNames = std::tr1::get<0>(GetParam());
-    const int depth = std::tr1::get<1>(GetParam());
-
-    const double pyrScale = 0.5;
-    const int numLevels = 6;
-    const int winSize = 7;
-    const int numIters = 15;
-    const int polyN = 7;
-    const double polySigma = 1.5;
-    const int flags = cv::OPTFLOW_USE_INITIAL_FLOW;
-
-    cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE);
-    if (src1.empty())
-        FAIL() << "Unable to load source image [" << fileNames.first << "]";
-
-    cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE);
-    if (src2.empty())
-        FAIL() << "Unable to load source image [" << fileNames.second << "]";
-
-    if (depth != CV_8U)
-    {
-        src1.convertTo(src1, depth);
-        src2.convertTo(src2, depth);
-    }
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_u(src1.size(), CV_32FC1, cv::Scalar::all(0));
-        cv::gpu::GpuMat d_v(src1.size(), CV_32FC1, cv::Scalar::all(0));
-
-        cv::gpu::FarnebackOpticalFlow d_farneback;
-        d_farneback.pyrScale = pyrScale;
-        d_farneback.numLevels = numLevels;
-        d_farneback.winSize = winSize;
-        d_farneback.numIters = numIters;
-        d_farneback.polyN = polyN;
-        d_farneback.polySigma = polySigma;
-        d_farneback.flags = flags;
-
-        d_farneback(d_src1, d_src2, d_u, d_v);
-
-        TEST_CYCLE_N(10)
-        {
-            d_farneback(d_src1, d_src2, d_u, d_v);
-        }
-    }
-    else
-    {
-        if (depth != CV_8U)
-            FAIL() << "Unsupported depth";
-
-        cv::Mat flow(src1.size(), CV_32FC2, cv::Scalar::all(0));
-
-        cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
-
-        TEST_CYCLE_N(10)
-        {
-            cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
-        }
-    }
-
-    SANITY_CHECK(0);
-}
-
-//////////////////////////////////////////////////////////
-// OpticalFlowBM
-
-void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
-                       cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious,
-                       cv::Mat& velx, cv::Mat& vely)
-{
-    cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height);
-
-    velx.create(sz, CV_32FC1);
-    vely.create(sz, CV_32FC1);
-
-    CvMat cvprev = prev;
-    CvMat cvcurr = curr;
-
-    CvMat cvvelx = velx;
-    CvMat cvvely = vely;
-
-    cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely);
-}
-
-DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size);
-
-GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
-                testing::Combine(
-                    testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
-                    testing::Values(cv::Size(16, 16)),
-                    testing::Values(cv::Size(2, 2)),
-                    testing::Values(cv::Size(16, 16))
-                    ))
-{
-    declare.time(3000);
-
-    const string_pair fileNames = std::tr1::get<0>(GetParam());
-    const cv::Size block_size = std::tr1::get<1>(GetParam());
-    const cv::Size shift_size = std::tr1::get<2>(GetParam());
-    const cv::Size max_range = std::tr1::get<3>(GetParam());
-
-    cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE);
-    if (src1.empty())
-        FAIL() << "Unable to load source image [" << fileNames.first << "]";
-
-    cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE);
-    if (src2.empty())
-        FAIL() << "Unable to load source image [" << fileNames.second << "]";
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_velx, d_vely, buf;
-
-        cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
-
-        TEST_CYCLE_N(10)
-        {
-            cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
-        }
-    }
-    else
-    {
-        cv::Mat velx, vely;
-
-        calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
-
-        TEST_CYCLE_N(10)
-        {
-            calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
-        }
-    }
-
-    SANITY_CHECK(0);
-}
-
-GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
-                testing::Combine(
-                    testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
-                    testing::Values(cv::Size(16, 16)),
-                    testing::Values(cv::Size(1, 1)),
-                    testing::Values(cv::Size(16, 16))
-                    ))
-{
-    declare.time(3000);
-
-    const string_pair fileNames = std::tr1::get<0>(GetParam());
-    const cv::Size block_size = std::tr1::get<1>(GetParam());
-    const cv::Size shift_size = std::tr1::get<2>(GetParam());
-    const cv::Size max_range = std::tr1::get<3>(GetParam());
-
-    cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE);
-    if (src1.empty())
-        FAIL() << "Unable to load source image [" << fileNames.first << "]";
-
-    cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE);
-    if (src2.empty())
-        FAIL() << "Unable to load source image [" << fileNames.second << "]";
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_velx, d_vely;
-
-        cv::gpu::FastOpticalFlowBM fastBM;
-
-        fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width);
-
-        TEST_CYCLE_N(10)
-        {
-            fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width);
-        }
-    }
-    else
-    {
-        cv::Mat velx, vely;
-
-        calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
-
-        TEST_CYCLE_N(10)
-        {
-            calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
-        }
-    }
-
-    SANITY_CHECK(0);
-}

From a981dc93d0b317d4f4e3987a0b39810a6f61abb5 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 17:53:03 +0400
Subject: [PATCH 20/49] removed obsolete headers from gpu module

---
 modules/gpu/include/opencv2/gpu/devmem2d.hpp | 43 --------------------
 modules/gpu/include/opencv2/gpu/gpumat.hpp   | 43 --------------------
 2 files changed, 86 deletions(-)
 delete mode 100644 modules/gpu/include/opencv2/gpu/devmem2d.hpp
 delete mode 100644 modules/gpu/include/opencv2/gpu/gpumat.hpp

diff --git a/modules/gpu/include/opencv2/gpu/devmem2d.hpp b/modules/gpu/include/opencv2/gpu/devmem2d.hpp
deleted file mode 100644
index 18dfcd8ac..000000000
--- a/modules/gpu/include/opencv2/gpu/devmem2d.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "opencv2/core/cuda_devptrs.hpp"
diff --git a/modules/gpu/include/opencv2/gpu/gpumat.hpp b/modules/gpu/include/opencv2/gpu/gpumat.hpp
deleted file mode 100644
index 840398b57..000000000
--- a/modules/gpu/include/opencv2/gpu/gpumat.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "opencv2/core/gpumat.hpp"

From 623c7da74708d85ee7f2f08d8171d093fa63a98b Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 17:53:50 +0400
Subject: [PATCH 21/49] removed obsolete source files from gpu module

---
 modules/gpu/src/cuda/internal_shared.hpp |  58 --------
 modules/gpu/src/cuda/safe_call.hpp       |  50 -------
 modules/gpu/src/cuda/texture_binder.hpp  |  92 -------------
 modules/gpu/src/error.cpp                |  45 ------
 modules/gpu/src/speckle_filtering.cpp    | 168 -----------------------
 5 files changed, 413 deletions(-)
 delete mode 100644 modules/gpu/src/cuda/internal_shared.hpp
 delete mode 100644 modules/gpu/src/cuda/safe_call.hpp
 delete mode 100644 modules/gpu/src/cuda/texture_binder.hpp
 delete mode 100644 modules/gpu/src/error.cpp
 delete mode 100644 modules/gpu/src/speckle_filtering.cpp

diff --git a/modules/gpu/src/cuda/internal_shared.hpp b/modules/gpu/src/cuda/internal_shared.hpp
deleted file mode 100644
index ce2cfe465..000000000
--- a/modules/gpu/src/cuda/internal_shared.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_internal_shared_HPP__
-#define __OPENCV_internal_shared_HPP__
-
-#include <cuda_runtime.h>
-#include <npp.h>
-
-#include "opencv2/core/cuda_devptrs.hpp"
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/gpunvidia/private.hpp"
-
-
-#include "safe_call.hpp"
-
-
-
-#endif /* __OPENCV_internal_shared_HPP__ */
diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp
deleted file mode 100644
index 35530bee3..000000000
--- a/modules/gpu/src/cuda/safe_call.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
-#define __OPENCV_CUDA_SAFE_CALL_HPP__
-
-#include <cufft.h>
-
-
-
-#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
diff --git a/modules/gpu/src/cuda/texture_binder.hpp b/modules/gpu/src/cuda/texture_binder.hpp
deleted file mode 100644
index e9677460f..000000000
--- a/modules/gpu/src/cuda/texture_binder.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef OPENCV_GPU_TEXTURE_BINDER_HPP_
-#define OPENCV_GPU_TEXTURE_BINDER_HPP_
-
-#include "opencv2/gpu/devmem2d.hpp"
-#include <safe_call.hpp>
-
-namespace cv
-{
-  namespace gpu
-  {
-    class TextureBinder
-    {
-    public:
-      template<class T, enum cudaTextureReadMode readMode>
-      TextureBinder(const PtrStepSz<T>& arr, const struct texture<T, 2, readMode>& tex) : texref(&tex)
-      {
-        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
-        cudaSafeCall( cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step) );
-      }
-
-      template<class T, enum cudaTextureReadMode readMode>
-      TextureBinder(const PtrSz<T>& arr, const struct texture<T, 1, readMode> &tex) : texref(&tex)
-      {
-        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
-        cudaSafeCall( cudaBindTexture(0, tex, arr.data, desc, arr.size * arr.elemSize()) );
-      }
-
-      template<class A, class T, enum cudaTextureReadMode readMode>
-      TextureBinder(const A& arr, const struct texture<T, 2, readMode>& tex, const cudaChannelFormatDesc& desc) : texref(&tex)
-      {
-        cudaSafeCall( cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step) );
-      }
-
-
-      ~TextureBinder()
-      {
-        cudaSafeCall( cudaUnbindTexture(texref) );
-      }
-    private:
-      const struct textureReference *texref;
-    };
-  }
-
-  namespace cuda
-  {
-      using cv::gpu::TextureBinder;
-  }
-}
-
-#endif /* OPENCV_GPU_TEXTURE_BINDER_HPP_*/
diff --git a/modules/gpu/src/error.cpp b/modules/gpu/src/error.cpp
deleted file mode 100644
index d0a621d79..000000000
--- a/modules/gpu/src/error.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::gpu;
diff --git a/modules/gpu/src/speckle_filtering.cpp b/modules/gpu/src/speckle_filtering.cpp
deleted file mode 100644
index 85b6c47c7..000000000
--- a/modules/gpu/src/speckle_filtering.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace cv;
-
-namespace cv {
-// TODO: conflicts with calib3d.hpp : filterSpeckles, should be removed ?
-
-//! Speckle filtering - filters small connected components on diparity image.
-//! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.
-//! Threshold for border between CC is diffThreshold;
-  CV_EXPORTS void filterSpeckles(Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf);
-}
-
-typedef Point_<short> Point2s;
-
-void cv::filterSpeckles( Mat& img, uchar newVal, int maxSpeckleSize, uchar maxDiff, Mat& _buf)
-{
-    int MaxD = 1024;
-    int WinSz = 64;
-
-    int bufSize0 = (MaxD + 2)*sizeof(int) + (img.rows+WinSz+2)*MaxD*sizeof(int) +
-        (img.rows + WinSz + 2)*sizeof(int) +
-        (img.rows+WinSz+2)*MaxD*(WinSz+1)*sizeof(uchar) + 256;
-    int bufSize1 = (img.cols + 9 + 2) * sizeof(int) + 256;
-    int bufSz = std::max(bufSize0 * 1, bufSize1 * 2);
-
-    _buf.create(1, bufSz, CV_8U);
-
-    CV_Assert( img.type() == CV_8U );
-
-    int width = img.cols, height = img.rows, npixels = width*height;
-    size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
-    if( !_buf.isContinuous() || !_buf.data || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
-        _buf.create(1, (int)bufSize, CV_8U);
-
-    uchar* buf = _buf.data;
-    int i, j, dstep = (int)(img.step/sizeof(uchar));
-    int* labels = (int*)buf;
-    buf += npixels*sizeof(labels[0]);
-    Point2s* wbuf = (Point2s*)buf;
-    buf += npixels*sizeof(wbuf[0]);
-    uchar* rtype = (uchar*)buf;
-    int curlabel = 0;
-
-    // clear out label assignments
-    memset(labels, 0, npixels*sizeof(labels[0]));
-
-    for( i = 0; i < height; i++ )
-    {
-        uchar* ds = img.ptr<uchar>(i);
-        int* ls = labels + width*i;
-
-        for( j = 0; j < width; j++ )
-        {
-            if( ds[j] != newVal )	// not a bad disparity
-            {
-                if( ls[j] )		// has a label, check for bad label
-                {
-                    if( rtype[ls[j]] ) // small region, zero out disparity
-                        ds[j] = (uchar)newVal;
-                }
-                // no label, assign and propagate
-                else
-                {
-                    Point2s* ws = wbuf;	// initialize wavefront
-                    Point2s p((short)j, (short)i);	// current pixel
-                    curlabel++;	// next label
-                    int count = 0;	// current region size
-                    ls[j] = curlabel;
-
-                    // wavefront propagation
-                    while( ws >= wbuf ) // wavefront not empty
-                    {
-                        count++;
-                        // put neighbors onto wavefront
-                        uchar* dpp = &img.at<uchar>(p.y, p.x);
-                        uchar dp = *dpp;
-                        int* lpp = labels + width*p.y + p.x;
-
-                        if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
-                        {
-                            lpp[+1] = curlabel;
-                            *ws++ = Point2s(p.x+1, p.y);
-                        }
-
-                        if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
-                        {
-                            lpp[-1] = curlabel;
-                            *ws++ = Point2s(p.x-1, p.y);
-                        }
-
-                        if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
-                        {
-                            lpp[+width] = curlabel;
-                            *ws++ = Point2s(p.x, p.y+1);
-                        }
-
-                        if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
-                        {
-                            lpp[-width] = curlabel;
-                            *ws++ = Point2s(p.x, p.y-1);
-                        }
-
-                        // pop most recent and propagate
-                        // NB: could try least recent, maybe better convergence
-                        p = *--ws;
-                    }
-
-                    // assign label type
-                    if( count <= maxSpeckleSize )	// speckle region
-                    {
-                        //printf("count = %d\n", count);
-                        rtype[ls[j]] = 1;	// small region label
-                        ds[j] = (uchar)newVal;
-                    }
-                    else
-                    {
-                        //printf("count = %d\n", count);
-                        rtype[ls[j]] = 0;	// large region label
-                    }
-                }
-            }
-        }
-    }
-}
-

From 77aafc2984af7ed2e2ccc5968548dffe5a1740d7 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 17:54:21 +0400
Subject: [PATCH 22/49] removed unused includes

---
 modules/gpu/include/opencv2/gpu.hpp | 103 +---------------------------
 modules/gpu/perf/perf_precomp.hpp   |  11 ---
 modules/gpu/perf4au/main.cpp        |   9 +--
 modules/gpu/src/precomp.hpp         |  35 ----------
 modules/gpu/test/test_precomp.hpp   |  22 +-----
 5 files changed, 5 insertions(+), 175 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index d6135865c..e2f747806 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -43,12 +43,6 @@
 #ifndef __OPENCV_GPU_HPP__
 #define __OPENCV_GPU_HPP__
 
-#ifndef SKIP_INCLUDES
-#include <vector>
-#include <memory>
-#include <iosfwd>
-#endif
-
 #include "opencv2/core/gpumat.hpp"
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
@@ -58,100 +52,7 @@
 #include "opencv2/gpucalib3d.hpp"
 #include "opencv2/gpuobjdetect.hpp"
 
-#include "opencv2/imgproc.hpp"
-#include "opencv2/objdetect.hpp"
-#include "opencv2/features2d.hpp"
-
 namespace cv { namespace gpu {
-////////////////////////////// Image processing //////////////////////////////
-
-
-
-
-///////////////////////////// Calibration 3D //////////////////////////////////
-
-//////////////////////////////// Image Labeling ////////////////////////////////
-
-
-
-////////////////////////////////// Histograms //////////////////////////////////
-
-
-
-//////////////////////////////// StereoBM_GPU ////////////////////////////////
-
-
-
-////////////////////////// StereoBeliefPropagation ///////////////////////////
-
-
-/////////////////////////// StereoConstantSpaceBP ///////////////////////////
-
-
-
-/////////////////////////// DisparityBilateralFilter ///////////////////////////
-
-
-
-
-
-
-////////////////////////////////// BruteForceMatcher //////////////////////////////////
-
-
-
-template <class Distance>
-class CV_EXPORTS BruteForceMatcher_GPU;
-
-template <typename T>
-class CV_EXPORTS BruteForceMatcher_GPU< L1<T> > : public BFMatcher_GPU
-{
-public:
-    explicit BruteForceMatcher_GPU() : BFMatcher_GPU(NORM_L1) {}
-    explicit BruteForceMatcher_GPU(L1<T> /*d*/) : BFMatcher_GPU(NORM_L1) {}
-};
-template <typename T>
-class CV_EXPORTS BruteForceMatcher_GPU< L2<T> > : public BFMatcher_GPU
-{
-public:
-    explicit BruteForceMatcher_GPU() : BFMatcher_GPU(NORM_L2) {}
-    explicit BruteForceMatcher_GPU(L2<T> /*d*/) : BFMatcher_GPU(NORM_L2) {}
-};
-template <> class CV_EXPORTS BruteForceMatcher_GPU< Hamming > : public BFMatcher_GPU
-{
-public:
-    explicit BruteForceMatcher_GPU() : BFMatcher_GPU(NORM_HAMMING) {}
-    explicit BruteForceMatcher_GPU(Hamming /*d*/) : BFMatcher_GPU(NORM_HAMMING) {}
-};
-
-////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
-
-
-////////////////////////////////// FAST //////////////////////////////////////////
-
-
-
-////////////////////////////////// ORB //////////////////////////////////////////
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
 //! removes points (CV_32FC2, single row matrix) with zero mask value
 CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);
@@ -160,8 +61,6 @@ CV_EXPORTS void calcWobbleSuppressionMaps(
         int left, int idx, int right, Size size, const Mat &ml, const Mat &mr,
         GpuMat &mapx, GpuMat &mapy);
 
-} // namespace gpu
-
-} // namespace cv
+}} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPU_HPP__ */
diff --git a/modules/gpu/perf/perf_precomp.hpp b/modules/gpu/perf/perf_precomp.hpp
index f365a5aea..9c75581d9 100644
--- a/modules/gpu/perf/perf_precomp.hpp
+++ b/modules/gpu/perf/perf_precomp.hpp
@@ -51,21 +51,10 @@
 #ifndef __OPENCV_PERF_PRECOMP_HPP__
 #define __OPENCV_PERF_PRECOMP_HPP__
 
-#include <cstdio>
-#include <iostream>
-
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
-#include "opencv2/core.hpp"
-#include "opencv2/highgui.hpp"
 #include "opencv2/gpu.hpp"
-#include "opencv2/calib3d.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/video.hpp"
-#include "opencv2/photo.hpp"
-
-#include "opencv2/core/gpu_private.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
diff --git a/modules/gpu/perf4au/main.cpp b/modules/gpu/perf4au/main.cpp
index 47a6c4e25..30e63d541 100644
--- a/modules/gpu/perf4au/main.cpp
+++ b/modules/gpu/perf4au/main.cpp
@@ -40,18 +40,13 @@
 //
 //M*/
 
-#include <cstdio>
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
 
-#ifdef HAVE_CVCONFIG_H
-#include "cvconfig.h"
-#endif
-#include "opencv2/core.hpp"
 #include "opencv2/gpu.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/video.hpp"
 #include "opencv2/legacy.hpp"
-#include "opencv2/ts.hpp"
-#include "opencv2/ts/gpu_perf.hpp"
 
 int main(int argc, char* argv[])
 {
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index 0127bd28e..1b5207b38 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -43,43 +43,8 @@
 #ifndef __OPENCV_PRECOMP_H__
 #define __OPENCV_PRECOMP_H__
 
-#if defined _MSC_VER && _MSC_VER >= 1200
-    #pragma warning( disable: 4251 4710 4711 4514 4996 )
-#endif
-
-#include <cstring>
-#include <iostream>
-#include <limits>
-#include <vector>
-#include <algorithm>
-#include <sstream>
-#include <exception>
-#include <iterator>
-#include <functional>
-#include <utility>
-#include <deque>
-#include <stdexcept>
-#include <memory>
-
-#include "opencv2/core.hpp"
-#include "opencv2/core/utility.hpp"
 #include "opencv2/gpu.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/calib3d.hpp"
-#include "opencv2/video.hpp"
 
-#include "opencv2/core/private.hpp"
 #include "opencv2/core/gpu_private.hpp"
 
-#ifdef HAVE_CUDA
-    #ifdef HAVE_CUFFT
-        #include <cufft.h>
-    #endif
-
-    #include "internal_shared.hpp"
-    #include "opencv2/core/stream_accessor.hpp"
-
-    #include "opencv2/gpunvidia.hpp"
-#endif /* defined(HAVE_CUDA) */
-
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index f98f364b9..1e4248101 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -51,29 +51,11 @@
 #ifndef __OPENCV_TEST_PRECOMP_HPP__
 #define __OPENCV_TEST_PRECOMP_HPP__
 
-#include <cmath>
-#include <ctime>
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <functional>
-#include <sstream>
-#include <string>
-#include <limits>
-#include <algorithm>
-#include <iterator>
-#include <stdexcept>
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
 
 #include "opencv2/core.hpp"
 #include "opencv2/core/opengl.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/calib3d.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/video.hpp"
-#include "opencv2/ts.hpp"
-#include "opencv2/ts/gpu_test.hpp"
 #include "opencv2/gpu.hpp"
 
-#include "opencv2/core/gpu_private.hpp"
-
 #endif

From 7e91e1871ddba11357c2c3f690e75c8a91de7f40 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 17:54:39 +0400
Subject: [PATCH 23/49] simplify CMakeLists.txt for gpu module

---
 modules/gpu/CMakeLists.txt         | 59 ++----------------------------
 modules/gpu/perf4au/CMakeLists.txt |  1 -
 2 files changed, 3 insertions(+), 57 deletions(-)

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 55faa397b..de132cf9f 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -4,63 +4,10 @@ endif()
 
 set(the_description "GPU-accelerated Computer Vision")
 
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy
-                   opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d opencv_gpuobjdetect
-                   OPTIONAL opencv_gpunvidia)
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
-
-file(GLOB lib_hdrs               "include/opencv2/*.hpp"                       "include/opencv2/${name}/*.hpp"               "include/opencv2/${name}/*.h")
-file(GLOB lib_int_hdrs           "src/*.hpp"      "src/*.h")
-file(GLOB lib_cuda_hdrs          "src/cuda/*.hpp" "src/cuda/*.h")
-file(GLOB lib_srcs               "src/*.cpp")
-file(GLOB lib_cuda               "src/cuda/*.cu*")
-
-source_group("Include"        FILES ${lib_hdrs})
-source_group("Src\\Host"      FILES ${lib_srcs} ${lib_int_hdrs})
-source_group("Src\\Cuda"      FILES ${lib_cuda} ${lib_cuda_hdrs})
-
-if(HAVE_CUDA)
-  ocv_include_directories(${CUDA_INCLUDE_DIRS})
-  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter /wd4211 /wd4201 /wd4100 /wd4505 /wd4408)
-
-  if(MSVC)
-    if(NOT ENABLE_NOISY_WARNINGS)
-      foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
-        string(REPLACE "/W4" "/W3" ${var} "${${var}}")
-      endforeach()
-
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler /wd4251)
-    endif()
-  endif()
-
-  ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda})
-
-  set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
-else()
-  set(lib_cuda "")
-  set(cuda_objs "")
-  set(cuda_link_libs "")
-endif()
-
-ocv_set_module_sources(
-  HEADERS ${lib_hdrs}
-  SOURCES ${lib_int_hdrs} ${lib_cuda_hdrs} ${lib_srcs} ${lib_cuda} ${cuda_objs}
-  )
-
-ocv_create_module(${cuda_link_libs})
-
-ocv_add_precompiled_headers(${the_module})
-
-################################################################################################################
-################################      GPU Module Tests     #####################################################
-################################################################################################################
-file(GLOB test_srcs "test/*.cpp")
-file(GLOB test_hdrs "test/*.hpp" "test/*.h")
-
-ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
-                       FILES "Src" ${test_srcs})
-ocv_add_perf_tests()
+ocv_define_module(gpu opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc
+                      opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d opencv_gpuobjdetect)
 
 if(HAVE_CUDA)
   add_subdirectory(perf4au)
diff --git a/modules/gpu/perf4au/CMakeLists.txt b/modules/gpu/perf4au/CMakeLists.txt
index 745220382..376e7b270 100644
--- a/modules/gpu/perf4au/CMakeLists.txt
+++ b/modules/gpu/perf4au/CMakeLists.txt
@@ -25,4 +25,3 @@ if(WIN32)
         set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG")
     endif()
 endif()
-

From 508fb6aa5b15aef698c1fe37d655a1f11ecf17fd Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:14:43 +0400
Subject: [PATCH 24/49] renamed gpunvidia -> gpulegacy

---
 modules/gpuarithm/CMakeLists.txt                 |  2 +-
 modules/gpuarithm/src/arithm.cpp                 |  4 ++--
 modules/gpuarithm/src/precomp.hpp                |  6 +++---
 modules/gpulegacy/CMakeLists.txt                 |  9 +++++++++
 .../include/opencv2/gpulegacy.hpp}               | 16 ++++++++--------
 .../include/opencv2/gpulegacy}/NCV.hpp           |  0
 .../opencv2/gpulegacy}/NCVBroxOpticalFlow.hpp    |  2 +-
 .../gpulegacy}/NCVHaarObjectDetection.hpp        |  2 +-
 .../include/opencv2/gpulegacy}/NCVPyramid.hpp    | 12 +++++++++++-
 .../include/opencv2/gpulegacy}/NPP_staging.hpp   |  2 +-
 .../include/opencv2/gpulegacy}/private.hpp       | 10 +++++-----
 modules/{gpunvidia => gpulegacy}/src/NCV.cpp     |  0
 modules/{gpunvidia => gpulegacy}/src/cuda/NCV.cu |  2 +-
 .../{gpunvidia => gpulegacy}/src/cuda/NCVAlg.hpp |  2 +-
 .../src/cuda/NCVBroxOpticalFlow.cu               |  4 ++--
 .../src/cuda/NCVColorConversion.hpp              |  0
 .../src/cuda/NCVHaarObjectDetection.cu           |  6 +++---
 .../src/cuda/NCVPixelOperations.hpp              |  2 +-
 .../src/cuda/NCVPyramid.cu                       |  4 ++--
 .../src/cuda/NCVRuntimeTemplates.hpp             |  0
 .../src/cuda/NPP_staging.cu                      |  2 +-
 modules/{gpunvidia => gpulegacy}/src/precomp.cpp |  0
 modules/{gpunvidia => gpulegacy}/src/precomp.hpp |  4 ++--
 .../test/NCVAutoTestLister.hpp                   |  0
 .../{gpunvidia => gpulegacy}/test/NCVTest.hpp    |  2 +-
 .../test/NCVTestSourceProvider.hpp               |  2 +-
 .../test/TestCompact.cpp                         |  0
 .../{gpunvidia => gpulegacy}/test/TestCompact.h  |  0
 .../test/TestDrawRects.cpp                       |  0
 .../test/TestDrawRects.h                         |  0
 .../test/TestHaarCascadeApplication.cpp          |  0
 .../test/TestHaarCascadeApplication.h            |  0
 .../test/TestHaarCascadeLoader.cpp               |  0
 .../test/TestHaarCascadeLoader.h                 |  0
 .../test/TestHypothesesFilter.cpp                |  0
 .../test/TestHypothesesFilter.h                  |  0
 .../test/TestHypothesesGrow.cpp                  |  0
 .../test/TestHypothesesGrow.h                    |  0
 .../test/TestIntegralImage.cpp                   |  0
 .../test/TestIntegralImage.h                     |  0
 .../test/TestIntegralImageSquared.cpp            |  0
 .../test/TestIntegralImageSquared.h              |  0
 .../test/TestRectStdDev.cpp                      |  0
 .../test/TestRectStdDev.h                        |  0
 .../{gpunvidia => gpulegacy}/test/TestResize.cpp |  0
 .../{gpunvidia => gpulegacy}/test/TestResize.h   |  0
 .../test/TestTranspose.cpp                       |  0
 .../test/TestTranspose.h                         |  0
 .../test/main_nvidia.cpp                         |  0
 .../test/main_test_nvidia.h                      |  0
 .../{gpunvidia => gpulegacy}/test/test_main.cpp  |  0
 .../test/test_nvidia.cpp                         |  5 ++---
 .../test/test_precomp.cpp                        |  0
 .../test/test_precomp.hpp                        |  2 +-
 modules/gpunvidia/CMakeLists.txt                 |  9 ---------
 modules/gpuobjdetect/CMakeLists.txt              |  2 +-
 modules/gpuobjdetect/src/precomp.hpp             |  4 ++--
 modules/gpuvideo/CMakeLists.txt                  |  2 +-
 modules/gpuvideo/src/precomp.hpp                 |  4 ++--
 samples/gpu/CMakeLists.txt                       |  2 +-
 samples/gpu/cascadeclassifier_nvidia_api.cpp     |  2 +-
 samples/gpu/opticalflow_nvidia_api.cpp           |  2 +-
 62 files changed, 69 insertions(+), 60 deletions(-)
 create mode 100644 modules/gpulegacy/CMakeLists.txt
 rename modules/{gpunvidia/include/opencv2/gpunvidia.hpp => gpulegacy/include/opencv2/gpulegacy.hpp} (86%)
 rename modules/{gpunvidia/include/opencv2/gpunvidia => gpulegacy/include/opencv2/gpulegacy}/NCV.hpp (100%)
 rename modules/{gpunvidia/include/opencv2/gpunvidia => gpulegacy/include/opencv2/gpulegacy}/NCVBroxOpticalFlow.hpp (99%)
 rename modules/{gpunvidia/include/opencv2/gpunvidia => gpulegacy/include/opencv2/gpulegacy}/NCVHaarObjectDetection.hpp (99%)
 rename modules/{gpunvidia/include/opencv2/gpunvidia => gpulegacy/include/opencv2/gpulegacy}/NCVPyramid.hpp (89%)
 rename modules/{gpunvidia/include/opencv2/gpunvidia => gpulegacy/include/opencv2/gpulegacy}/NPP_staging.hpp (99%)
 rename modules/{gpunvidia/include/opencv2/gpunvidia => gpulegacy/include/opencv2/gpulegacy}/private.hpp (94%)
 rename modules/{gpunvidia => gpulegacy}/src/NCV.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCV.cu (99%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCVAlg.hpp (99%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCVBroxOpticalFlow.cu (99%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCVColorConversion.hpp (100%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCVHaarObjectDetection.cu (99%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCVPixelOperations.hpp (99%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCVPyramid.cu (99%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NCVRuntimeTemplates.hpp (100%)
 rename modules/{gpunvidia => gpulegacy}/src/cuda/NPP_staging.cu (99%)
 rename modules/{gpunvidia => gpulegacy}/src/precomp.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/src/precomp.hpp (97%)
 rename modules/{gpunvidia => gpulegacy}/test/NCVAutoTestLister.hpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/NCVTest.hpp (99%)
 rename modules/{gpunvidia => gpulegacy}/test/NCVTestSourceProvider.hpp (99%)
 rename modules/{gpunvidia => gpulegacy}/test/TestCompact.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestCompact.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestDrawRects.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestDrawRects.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHaarCascadeApplication.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHaarCascadeApplication.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHaarCascadeLoader.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHaarCascadeLoader.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHypothesesFilter.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHypothesesFilter.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHypothesesGrow.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestHypothesesGrow.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestIntegralImage.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestIntegralImage.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestIntegralImageSquared.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestIntegralImageSquared.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestRectStdDev.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestRectStdDev.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestResize.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestResize.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestTranspose.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/TestTranspose.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/main_nvidia.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/main_test_nvidia.h (100%)
 rename modules/{gpunvidia => gpulegacy}/test/test_main.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/test_nvidia.cpp (96%)
 rename modules/{gpunvidia => gpulegacy}/test/test_precomp.cpp (100%)
 rename modules/{gpunvidia => gpulegacy}/test/test_precomp.hpp (99%)
 delete mode 100644 modules/gpunvidia/CMakeLists.txt

diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt
index 4be25dd8d..c5ce72304 100644
--- a/modules/gpuarithm/CMakeLists.txt
+++ b/modules/gpuarithm/CMakeLists.txt
@@ -6,7 +6,7 @@ set(the_description "GPU-accelerated Operations on Matrices")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_gpunvidia opencv_imgproc)
+ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_gpulegacy opencv_imgproc)
 
 if(HAVE_CUBLAS)
   CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index 59fd2ee1d..f5af24ee2 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -824,7 +824,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
     }
     else
     {
-#ifndef HAVE_OPENCV_GPUNVIDIA
+#ifndef HAVE_OPENCV_GPULEGACY
     throw_no_cuda();
 #else
         sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
@@ -856,7 +856,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
 
 void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
 {
-#ifndef HAVE_OPENCV_GPUNVIDIA
+#ifndef HAVE_OPENCV_GPULEGACY
     (void) src;
     (void) sqsum;
     (void) s;
diff --git a/modules/gpuarithm/src/precomp.hpp b/modules/gpuarithm/src/precomp.hpp
index f8e38e8cd..05493aa96 100644
--- a/modules/gpuarithm/src/precomp.hpp
+++ b/modules/gpuarithm/src/precomp.hpp
@@ -53,9 +53,9 @@
 
 #include "opencv2/opencv_modules.hpp"
 
-#ifdef HAVE_OPENCV_GPUNVIDIA
-#  include "opencv2/gpunvidia.hpp"
-#  include "opencv2/gpunvidia/private.hpp"
+#ifdef HAVE_OPENCV_GPULEGACY
+#  include "opencv2/gpulegacy.hpp"
+#  include "opencv2/gpulegacy/private.hpp"
 #endif
 
 #ifdef HAVE_CUBLAS
diff --git a/modules/gpulegacy/CMakeLists.txt b/modules/gpulegacy/CMakeLists.txt
new file mode 100644
index 000000000..6dd61bd5b
--- /dev/null
+++ b/modules/gpulegacy/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(NOT HAVE_CUDA)
+  ocv_module_disable(gpulegacy)
+endif()
+
+set(the_description "GPU-accelerated Computer Vision (legacy)")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wuninitialized)
+
+ocv_define_module(gpulegacy opencv_core OPTIONAL opencv_objdetect)
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia.hpp b/modules/gpulegacy/include/opencv2/gpulegacy.hpp
similarity index 86%
rename from modules/gpunvidia/include/opencv2/gpunvidia.hpp
rename to modules/gpulegacy/include/opencv2/gpulegacy.hpp
index 47555f890..fb88481a8 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy.hpp
@@ -40,13 +40,13 @@
 //
 //M*/
 
-#ifndef __OPENCV_GPUNVIDIA_HPP__
-#define __OPENCV_GPUNVIDIA_HPP__
+#ifndef __OPENCV_GPULEGACY_HPP__
+#define __OPENCV_GPULEGACY_HPP__
 
-#include "opencv2/gpunvidia/NCV.hpp"
-#include "opencv2/gpunvidia/NPP_staging.hpp"
-#include "opencv2/gpunvidia/NCVPyramid.hpp"
-#include "opencv2/gpunvidia/NCVHaarObjectDetection.hpp"
-#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
+#include "opencv2/gpulegacy/NPP_staging.hpp"
+#include "opencv2/gpulegacy/NCVPyramid.hpp"
+#include "opencv2/gpulegacy/NCVHaarObjectDetection.hpp"
+#include "opencv2/gpulegacy/NCVBroxOpticalFlow.hpp"
 
-#endif /* __OPENCV_GPUNVIDIA_HPP__ */
+#endif /* __OPENCV_GPULEGACY_HPP__ */
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
similarity index 100%
rename from modules/gpunvidia/include/opencv2/gpunvidia/NCV.hpp
rename to modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCVBroxOpticalFlow.hpp
similarity index 99%
rename from modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
rename to modules/gpulegacy/include/opencv2/gpulegacy/NCVBroxOpticalFlow.hpp
index 0634fff8f..689347ac4 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NCVBroxOpticalFlow.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVBroxOpticalFlow.hpp
@@ -60,7 +60,7 @@
 #ifndef _ncv_optical_flow_h_
 #define _ncv_optical_flow_h_
 
-#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
 
 /// \brief Model and solver parameters
 struct NCVBroxOpticalFlowDescriptor
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCVHaarObjectDetection.hpp
similarity index 99%
rename from modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
rename to modules/gpulegacy/include/opencv2/gpulegacy/NCVHaarObjectDetection.hpp
index 323c629e6..78ecf3b7e 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NCVHaarObjectDetection.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVHaarObjectDetection.hpp
@@ -59,7 +59,7 @@
 #ifndef _ncvhaarobjectdetection_hpp_
 #define _ncvhaarobjectdetection_hpp_
 
-#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
 
 
 //==============================================================================
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
similarity index 89%
rename from modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
rename to modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
index 91972c575..8fda836fe 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NCVPyramid.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
@@ -45,7 +45,17 @@
 
 #include <memory>
 #include <vector>
-#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
+#include "opencv2/core/cuda/common.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace pyramid
+    {
+        template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+        template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+    }
+}}}
 
 #if 0 //def _WIN32
 
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NPP_staging.hpp
similarity index 99%
rename from modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
rename to modules/gpulegacy/include/opencv2/gpulegacy/NPP_staging.hpp
index 2df393a10..ce64ae810 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/NPP_staging.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NPP_staging.hpp
@@ -43,7 +43,7 @@
 #ifndef _npp_staging_hpp_
 #define _npp_staging_hpp_
 
-#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
 
 
 /**
diff --git a/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
similarity index 94%
rename from modules/gpunvidia/include/opencv2/gpunvidia/private.hpp
rename to modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
index f23e53604..b8c9dd50a 100644
--- a/modules/gpunvidia/include/opencv2/gpunvidia/private.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_GPUNVIDIA_PRIVATE_HPP__
-#define __OPENCV_CORE_GPUNVIDIA_PRIVATE_HPP__
+#ifndef __OPENCV_CORE_GPULEGACY_PRIVATE_HPP__
+#define __OPENCV_CORE_GPULEGACY_PRIVATE_HPP__
 
 #ifndef __OPENCV_BUILD
 #  error this is a private header which should not be used from outside of the OpenCV library
@@ -51,10 +51,10 @@
 #include "opencv2/core/gpu_private.hpp"
 
 #ifndef HAVE_CUDA
-#  error gpunvidia module requires CUDA
+#  error gpulegacy module requires CUDA
 #endif
 
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpulegacy.hpp"
 
 namespace cv { namespace gpu
 {
@@ -93,4 +93,4 @@ namespace cv { namespace gpu
     #define ncvSafeCall(expr)  cv::gpu::checkNcvError(expr, __FILE__, __LINE__, "")
 #endif
 
-#endif // __OPENCV_CORE_GPUNVIDIA_PRIVATE_HPP__
+#endif // __OPENCV_CORE_GPULEGACY_PRIVATE_HPP__
diff --git a/modules/gpunvidia/src/NCV.cpp b/modules/gpulegacy/src/NCV.cpp
similarity index 100%
rename from modules/gpunvidia/src/NCV.cpp
rename to modules/gpulegacy/src/NCV.cpp
diff --git a/modules/gpunvidia/src/cuda/NCV.cu b/modules/gpulegacy/src/cuda/NCV.cu
similarity index 99%
rename from modules/gpunvidia/src/cuda/NCV.cu
rename to modules/gpulegacy/src/cuda/NCV.cu
index 0e5b50e9a..8774af5c9 100644
--- a/modules/gpunvidia/src/cuda/NCV.cu
+++ b/modules/gpulegacy/src/cuda/NCV.cu
@@ -43,7 +43,7 @@
 #include <iostream>
 #include <vector>
 
-#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
 
 //===================================================================
 //
diff --git a/modules/gpunvidia/src/cuda/NCVAlg.hpp b/modules/gpulegacy/src/cuda/NCVAlg.hpp
similarity index 99%
rename from modules/gpunvidia/src/cuda/NCVAlg.hpp
rename to modules/gpulegacy/src/cuda/NCVAlg.hpp
index ad14d749f..dc086d944 100644
--- a/modules/gpunvidia/src/cuda/NCVAlg.hpp
+++ b/modules/gpulegacy/src/cuda/NCVAlg.hpp
@@ -43,7 +43,7 @@
 #ifndef _ncv_alg_hpp_
 #define _ncv_alg_hpp_
 
-#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
 
 
 template <class T>
diff --git a/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu b/modules/gpulegacy/src/cuda/NCVBroxOpticalFlow.cu
similarity index 99%
rename from modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
rename to modules/gpulegacy/src/cuda/NCVBroxOpticalFlow.cu
index 4faba6331..d9848ad4b 100644
--- a/modules/gpunvidia/src/cuda/NCVBroxOpticalFlow.cu
+++ b/modules/gpulegacy/src/cuda/NCVBroxOpticalFlow.cu
@@ -63,8 +63,8 @@
 
 #include "opencv2/core/cuda/utility.hpp"
 
-#include "opencv2/gpunvidia/NPP_staging.hpp"
-#include "opencv2/gpunvidia/NCVBroxOpticalFlow.hpp"
+#include "opencv2/gpulegacy/NPP_staging.hpp"
+#include "opencv2/gpulegacy/NCVBroxOpticalFlow.hpp"
 
 
 typedef NCVVectorAlloc<Ncv32f> FloatVector;
diff --git a/modules/gpunvidia/src/cuda/NCVColorConversion.hpp b/modules/gpulegacy/src/cuda/NCVColorConversion.hpp
similarity index 100%
rename from modules/gpunvidia/src/cuda/NCVColorConversion.hpp
rename to modules/gpulegacy/src/cuda/NCVColorConversion.hpp
diff --git a/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
similarity index 99%
rename from modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
rename to modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
index 5296f24a3..da34ba731 100644
--- a/modules/gpunvidia/src/cuda/NCVHaarObjectDetection.cu
+++ b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
@@ -68,9 +68,9 @@
 #  include "opencv2/objdetect.hpp"
 #endif
 
-#include "opencv2/gpunvidia/NCV.hpp"
-#include "opencv2/gpunvidia/NPP_staging.hpp"
-#include "opencv2/gpunvidia/NCVHaarObjectDetection.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
+#include "opencv2/gpulegacy/NPP_staging.hpp"
+#include "opencv2/gpulegacy/NCVHaarObjectDetection.hpp"
 
 #include "NCVRuntimeTemplates.hpp"
 #include "NCVAlg.hpp"
diff --git a/modules/gpunvidia/src/cuda/NCVPixelOperations.hpp b/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
similarity index 99%
rename from modules/gpunvidia/src/cuda/NCVPixelOperations.hpp
rename to modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
index 2acdfb682..6409fab94 100644
--- a/modules/gpunvidia/src/cuda/NCVPixelOperations.hpp
+++ b/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
@@ -45,7 +45,7 @@
 
 #include <limits.h>
 #include <float.h>
-#include "opencv2/gpunvidia/NCV.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
 
 template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
 template<> static inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
diff --git a/modules/gpunvidia/src/cuda/NCVPyramid.cu b/modules/gpulegacy/src/cuda/NCVPyramid.cu
similarity index 99%
rename from modules/gpunvidia/src/cuda/NCVPyramid.cu
rename to modules/gpulegacy/src/cuda/NCVPyramid.cu
index 6b76c644b..acc4441b1 100644
--- a/modules/gpunvidia/src/cuda/NCVPyramid.cu
+++ b/modules/gpulegacy/src/cuda/NCVPyramid.cu
@@ -45,8 +45,8 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-#include "opencv2/gpunvidia/NCV.hpp"
-#include "opencv2/gpunvidia/NCVPyramid.hpp"
+#include "opencv2/gpulegacy/NCV.hpp"
+#include "opencv2/gpulegacy/NCVPyramid.hpp"
 
 #include "NCVAlg.hpp"
 #include "NCVPixelOperations.hpp"
diff --git a/modules/gpunvidia/src/cuda/NCVRuntimeTemplates.hpp b/modules/gpulegacy/src/cuda/NCVRuntimeTemplates.hpp
similarity index 100%
rename from modules/gpunvidia/src/cuda/NCVRuntimeTemplates.hpp
rename to modules/gpulegacy/src/cuda/NCVRuntimeTemplates.hpp
diff --git a/modules/gpunvidia/src/cuda/NPP_staging.cu b/modules/gpulegacy/src/cuda/NPP_staging.cu
similarity index 99%
rename from modules/gpunvidia/src/cuda/NPP_staging.cu
rename to modules/gpulegacy/src/cuda/NPP_staging.cu
index 31f7adc1d..9234e1795 100644
--- a/modules/gpunvidia/src/cuda/NPP_staging.cu
+++ b/modules/gpulegacy/src/cuda/NPP_staging.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
 
-#include "opencv2/gpunvidia/NPP_staging.hpp"
+#include "opencv2/gpulegacy/NPP_staging.hpp"
 
 
 texture<Ncv8u,  1, cudaReadModeElementType> tex8u;
diff --git a/modules/gpunvidia/src/precomp.cpp b/modules/gpulegacy/src/precomp.cpp
similarity index 100%
rename from modules/gpunvidia/src/precomp.cpp
rename to modules/gpulegacy/src/precomp.cpp
diff --git a/modules/gpunvidia/src/precomp.hpp b/modules/gpulegacy/src/precomp.hpp
similarity index 97%
rename from modules/gpunvidia/src/precomp.hpp
rename to modules/gpulegacy/src/precomp.hpp
index 106d0a321..c871296b2 100644
--- a/modules/gpunvidia/src/precomp.hpp
+++ b/modules/gpulegacy/src/precomp.hpp
@@ -47,7 +47,7 @@
 #include <iostream>
 #include <algorithm>
 
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpulegacy.hpp"
 #include "opencv2/core/utility.hpp"
 
 #include "opencv2/opencv_modules.hpp"
@@ -57,6 +57,6 @@
 #endif
 
 #include "opencv2/core/gpu_private.hpp"
-#include "opencv2/gpunvidia/private.hpp"
+#include "opencv2/gpulegacy/private.hpp"
 
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpunvidia/test/NCVAutoTestLister.hpp b/modules/gpulegacy/test/NCVAutoTestLister.hpp
similarity index 100%
rename from modules/gpunvidia/test/NCVAutoTestLister.hpp
rename to modules/gpulegacy/test/NCVAutoTestLister.hpp
diff --git a/modules/gpunvidia/test/NCVTest.hpp b/modules/gpulegacy/test/NCVTest.hpp
similarity index 99%
rename from modules/gpunvidia/test/NCVTest.hpp
rename to modules/gpulegacy/test/NCVTest.hpp
index d08044db0..b79c55e08 100644
--- a/modules/gpunvidia/test/NCVTest.hpp
+++ b/modules/gpulegacy/test/NCVTest.hpp
@@ -56,7 +56,7 @@
 
 #include <cuda_runtime.h>
 
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpulegacy.hpp"
 
 
 struct NCVTestReport
diff --git a/modules/gpunvidia/test/NCVTestSourceProvider.hpp b/modules/gpulegacy/test/NCVTestSourceProvider.hpp
similarity index 99%
rename from modules/gpunvidia/test/NCVTestSourceProvider.hpp
rename to modules/gpulegacy/test/NCVTestSourceProvider.hpp
index 38b9d814c..f2ffae56b 100644
--- a/modules/gpunvidia/test/NCVTestSourceProvider.hpp
+++ b/modules/gpulegacy/test/NCVTestSourceProvider.hpp
@@ -46,7 +46,7 @@
 #include <memory>
 
 #include "opencv2/highgui.hpp"
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpulegacy.hpp"
 
 
 template <class T>
diff --git a/modules/gpunvidia/test/TestCompact.cpp b/modules/gpulegacy/test/TestCompact.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestCompact.cpp
rename to modules/gpulegacy/test/TestCompact.cpp
diff --git a/modules/gpunvidia/test/TestCompact.h b/modules/gpulegacy/test/TestCompact.h
similarity index 100%
rename from modules/gpunvidia/test/TestCompact.h
rename to modules/gpulegacy/test/TestCompact.h
diff --git a/modules/gpunvidia/test/TestDrawRects.cpp b/modules/gpulegacy/test/TestDrawRects.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestDrawRects.cpp
rename to modules/gpulegacy/test/TestDrawRects.cpp
diff --git a/modules/gpunvidia/test/TestDrawRects.h b/modules/gpulegacy/test/TestDrawRects.h
similarity index 100%
rename from modules/gpunvidia/test/TestDrawRects.h
rename to modules/gpulegacy/test/TestDrawRects.h
diff --git a/modules/gpunvidia/test/TestHaarCascadeApplication.cpp b/modules/gpulegacy/test/TestHaarCascadeApplication.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestHaarCascadeApplication.cpp
rename to modules/gpulegacy/test/TestHaarCascadeApplication.cpp
diff --git a/modules/gpunvidia/test/TestHaarCascadeApplication.h b/modules/gpulegacy/test/TestHaarCascadeApplication.h
similarity index 100%
rename from modules/gpunvidia/test/TestHaarCascadeApplication.h
rename to modules/gpulegacy/test/TestHaarCascadeApplication.h
diff --git a/modules/gpunvidia/test/TestHaarCascadeLoader.cpp b/modules/gpulegacy/test/TestHaarCascadeLoader.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestHaarCascadeLoader.cpp
rename to modules/gpulegacy/test/TestHaarCascadeLoader.cpp
diff --git a/modules/gpunvidia/test/TestHaarCascadeLoader.h b/modules/gpulegacy/test/TestHaarCascadeLoader.h
similarity index 100%
rename from modules/gpunvidia/test/TestHaarCascadeLoader.h
rename to modules/gpulegacy/test/TestHaarCascadeLoader.h
diff --git a/modules/gpunvidia/test/TestHypothesesFilter.cpp b/modules/gpulegacy/test/TestHypothesesFilter.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestHypothesesFilter.cpp
rename to modules/gpulegacy/test/TestHypothesesFilter.cpp
diff --git a/modules/gpunvidia/test/TestHypothesesFilter.h b/modules/gpulegacy/test/TestHypothesesFilter.h
similarity index 100%
rename from modules/gpunvidia/test/TestHypothesesFilter.h
rename to modules/gpulegacy/test/TestHypothesesFilter.h
diff --git a/modules/gpunvidia/test/TestHypothesesGrow.cpp b/modules/gpulegacy/test/TestHypothesesGrow.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestHypothesesGrow.cpp
rename to modules/gpulegacy/test/TestHypothesesGrow.cpp
diff --git a/modules/gpunvidia/test/TestHypothesesGrow.h b/modules/gpulegacy/test/TestHypothesesGrow.h
similarity index 100%
rename from modules/gpunvidia/test/TestHypothesesGrow.h
rename to modules/gpulegacy/test/TestHypothesesGrow.h
diff --git a/modules/gpunvidia/test/TestIntegralImage.cpp b/modules/gpulegacy/test/TestIntegralImage.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestIntegralImage.cpp
rename to modules/gpulegacy/test/TestIntegralImage.cpp
diff --git a/modules/gpunvidia/test/TestIntegralImage.h b/modules/gpulegacy/test/TestIntegralImage.h
similarity index 100%
rename from modules/gpunvidia/test/TestIntegralImage.h
rename to modules/gpulegacy/test/TestIntegralImage.h
diff --git a/modules/gpunvidia/test/TestIntegralImageSquared.cpp b/modules/gpulegacy/test/TestIntegralImageSquared.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestIntegralImageSquared.cpp
rename to modules/gpulegacy/test/TestIntegralImageSquared.cpp
diff --git a/modules/gpunvidia/test/TestIntegralImageSquared.h b/modules/gpulegacy/test/TestIntegralImageSquared.h
similarity index 100%
rename from modules/gpunvidia/test/TestIntegralImageSquared.h
rename to modules/gpulegacy/test/TestIntegralImageSquared.h
diff --git a/modules/gpunvidia/test/TestRectStdDev.cpp b/modules/gpulegacy/test/TestRectStdDev.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestRectStdDev.cpp
rename to modules/gpulegacy/test/TestRectStdDev.cpp
diff --git a/modules/gpunvidia/test/TestRectStdDev.h b/modules/gpulegacy/test/TestRectStdDev.h
similarity index 100%
rename from modules/gpunvidia/test/TestRectStdDev.h
rename to modules/gpulegacy/test/TestRectStdDev.h
diff --git a/modules/gpunvidia/test/TestResize.cpp b/modules/gpulegacy/test/TestResize.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestResize.cpp
rename to modules/gpulegacy/test/TestResize.cpp
diff --git a/modules/gpunvidia/test/TestResize.h b/modules/gpulegacy/test/TestResize.h
similarity index 100%
rename from modules/gpunvidia/test/TestResize.h
rename to modules/gpulegacy/test/TestResize.h
diff --git a/modules/gpunvidia/test/TestTranspose.cpp b/modules/gpulegacy/test/TestTranspose.cpp
similarity index 100%
rename from modules/gpunvidia/test/TestTranspose.cpp
rename to modules/gpulegacy/test/TestTranspose.cpp
diff --git a/modules/gpunvidia/test/TestTranspose.h b/modules/gpulegacy/test/TestTranspose.h
similarity index 100%
rename from modules/gpunvidia/test/TestTranspose.h
rename to modules/gpulegacy/test/TestTranspose.h
diff --git a/modules/gpunvidia/test/main_nvidia.cpp b/modules/gpulegacy/test/main_nvidia.cpp
similarity index 100%
rename from modules/gpunvidia/test/main_nvidia.cpp
rename to modules/gpulegacy/test/main_nvidia.cpp
diff --git a/modules/gpunvidia/test/main_test_nvidia.h b/modules/gpulegacy/test/main_test_nvidia.h
similarity index 100%
rename from modules/gpunvidia/test/main_test_nvidia.h
rename to modules/gpulegacy/test/main_test_nvidia.h
diff --git a/modules/gpunvidia/test/test_main.cpp b/modules/gpulegacy/test/test_main.cpp
similarity index 100%
rename from modules/gpunvidia/test/test_main.cpp
rename to modules/gpulegacy/test/test_main.cpp
diff --git a/modules/gpunvidia/test/test_nvidia.cpp b/modules/gpulegacy/test/test_nvidia.cpp
similarity index 96%
rename from modules/gpunvidia/test/test_nvidia.cpp
rename to modules/gpulegacy/test/test_nvidia.cpp
index d713b41bd..1cda187bc 100644
--- a/modules/gpunvidia/test/test_nvidia.cpp
+++ b/modules/gpulegacy/test/test_nvidia.cpp
@@ -140,13 +140,12 @@ GPU_TEST_P(NCV, HypothesesFiltration)
 
 GPU_TEST_P(NCV, Visualization)
 {
-    // this functionality doesn't used in gpu module
     bool res = nvidia_NCV_Visualization(_path, nvidiaTestOutputLevel);
 
     ASSERT_TRUE(res);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_NVidia, NPPST, ALL_DEVICES);
-INSTANTIATE_TEST_CASE_P(GPU_NVidia, NCV, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Legacy, NPPST, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Legacy, NCV, ALL_DEVICES);
 
 #endif // HAVE_CUDA
diff --git a/modules/gpunvidia/test/test_precomp.cpp b/modules/gpulegacy/test/test_precomp.cpp
similarity index 100%
rename from modules/gpunvidia/test/test_precomp.cpp
rename to modules/gpulegacy/test/test_precomp.cpp
diff --git a/modules/gpunvidia/test/test_precomp.hpp b/modules/gpulegacy/test/test_precomp.hpp
similarity index 99%
rename from modules/gpunvidia/test/test_precomp.hpp
rename to modules/gpulegacy/test/test_precomp.hpp
index 46acfc2ec..0de044acb 100644
--- a/modules/gpunvidia/test/test_precomp.hpp
+++ b/modules/gpulegacy/test/test_precomp.hpp
@@ -69,7 +69,7 @@
 #include "opencv2/ts/gpu_test.hpp"
 
 #include "opencv2/core/gpumat.hpp"
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpulegacy.hpp"
 #include "opencv2/highgui.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
diff --git a/modules/gpunvidia/CMakeLists.txt b/modules/gpunvidia/CMakeLists.txt
deleted file mode 100644
index 3f4e4f6a6..000000000
--- a/modules/gpunvidia/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-if(NOT HAVE_CUDA)
-  ocv_module_disable(gpunvidia)
-endif()
-
-set(the_description "GPU-accelerated Computer Vision (HAL module)")
-
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
-
-ocv_define_module(gpunvidia opencv_core OPTIONAL opencv_objdetect)
diff --git a/modules/gpuobjdetect/CMakeLists.txt b/modules/gpuobjdetect/CMakeLists.txt
index 745c02b5b..5bce4d283 100644
--- a/modules/gpuobjdetect/CMakeLists.txt
+++ b/modules/gpuobjdetect/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Object Detection")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuobjdetect opencv_objdetect opencv_gpuimgproc OPTIONAL opencv_gpunvidia)
+ocv_define_module(gpuobjdetect opencv_objdetect opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuobjdetect/src/precomp.hpp b/modules/gpuobjdetect/src/precomp.hpp
index 47058499d..003df2eca 100644
--- a/modules/gpuobjdetect/src/precomp.hpp
+++ b/modules/gpuobjdetect/src/precomp.hpp
@@ -53,8 +53,8 @@
 
 #include "opencv2/opencv_modules.hpp"
 
-#ifdef HAVE_OPENCV_GPUNVIDIA
-#  include "opencv2/gpunvidia/private.hpp"
+#ifdef HAVE_OPENCV_GPULEGACY
+#  include "opencv2/gpulegacy/private.hpp"
 #endif
 
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuvideo/CMakeLists.txt b/modules/gpuvideo/CMakeLists.txt
index 3e4e4baef..4e6264aed 100644
--- a/modules/gpuvideo/CMakeLists.txt
+++ b/modules/gpuvideo/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Video Analysis")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuvideo opencv_video opencv_legacy opencv_gpufilters opencv_gpuimgproc OPTIONAL opencv_gpunvidia)
+ocv_define_module(gpuvideo opencv_video opencv_legacy opencv_gpufilters opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuvideo/src/precomp.hpp b/modules/gpuvideo/src/precomp.hpp
index 276eb22fc..1e37cf77d 100644
--- a/modules/gpuvideo/src/precomp.hpp
+++ b/modules/gpuvideo/src/precomp.hpp
@@ -57,8 +57,8 @@
 
 #include "opencv2/opencv_modules.hpp"
 
-#ifdef HAVE_OPENCV_GPUNVIDIA
-#  include "opencv2/gpunvidia/private.hpp"
+#ifdef HAVE_OPENCV_GPULEGACY
+#  include "opencv2/gpulegacy/private.hpp"
 #endif
 
 #ifdef HAVE_CUDA
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 3bf506932..d69ccdc2e 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpunvidia opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpuobjdetect
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpulegacy opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpuobjdetect
                                      opencv_gpucalib3d)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp
index a4bc6a973..f8dd60d76 100644
--- a/samples/gpu/cascadeclassifier_nvidia_api.cpp
+++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp
@@ -11,7 +11,7 @@
 #include "opencv2/objdetect/objdetect_c.h"
 
 #ifdef HAVE_CUDA
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpulegacy.hpp"
 #endif
 
 using namespace std;
diff --git a/samples/gpu/opticalflow_nvidia_api.cpp b/samples/gpu/opticalflow_nvidia_api.cpp
index e4fc93cd5..4e0863b46 100644
--- a/samples/gpu/opticalflow_nvidia_api.cpp
+++ b/samples/gpu/opticalflow_nvidia_api.cpp
@@ -16,7 +16,7 @@
 #include "opencv2/highgui/highgui_c.h"
 
 #ifdef HAVE_CUDA
-#include "opencv2/gpunvidia.hpp"
+#include "opencv2/gpulegacy.hpp"
 #endif
 
 #if !defined(HAVE_CUDA)

From 71eeaa72765a7147a8ac11bb21c5015148da2930 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 18:05:06 +0400
Subject: [PATCH 25/49] gpuarithm module fixes

---
 modules/gpuarithm/CMakeLists.txt              |  18 +-
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |   5 +-
 modules/gpuarithm/perf/perf_core.cpp          | 171 +++++++++++-------
 modules/gpuarithm/perf/perf_precomp.hpp       |   9 +-
 modules/gpuarithm/src/arithm.cpp              |  76 +++++---
 modules/gpuarithm/src/matrix_reductions.cpp   |  26 +--
 modules/gpuarithm/src/precomp.hpp             |   2 +
 modules/gpuarithm/test/test_core.cpp          | 156 ++++++++++------
 modules/gpuarithm/test/test_precomp.hpp       |   9 +-
 modules/gpuarithm/test/test_threshold.cpp     |  93 ----------
 10 files changed, 295 insertions(+), 270 deletions(-)
 delete mode 100644 modules/gpuarithm/test/test_threshold.cpp

diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt
index c5ce72304..4b09dc182 100644
--- a/modules/gpuarithm/CMakeLists.txt
+++ b/modules/gpuarithm/CMakeLists.txt
@@ -6,12 +6,24 @@ set(the_description "GPU-accelerated Operations on Matrices")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_gpulegacy opencv_imgproc)
+ocv_add_module(gpuarithm opencv_core OPTIONAL opencv_gpulegacy)
+
+ocv_module_include_directories()
+ocv_glob_module_sources()
+
+set(extra_libs "")
 
 if(HAVE_CUBLAS)
-  CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
+  list(APPEND extra_libs ${CUDA_cublas_LIBRARY})
 endif()
 
 if(HAVE_CUFFT)
-  CUDA_ADD_CUFFT_TO_TARGET(${the_module})
+  list(APPEND extra_libs ${CUDA_cufft_LIBRARY})
 endif()
+
+ocv_create_module(${extra_libs})
+
+ocv_add_precompiled_headers(${the_module})
+
+ocv_add_accuracy_tests(DEPENDS_ON opencv_imgproc)
+ocv_add_perf_tests(DEPENDS_ON opencv_imgproc)
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index f65d2ec55..2ebaf2af1 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -43,6 +43,10 @@
 #ifndef __OPENCV_GPUARITHM_HPP__
 #define __OPENCV_GPUARITHM_HPP__
 
+#ifndef __cplusplus
+#  error gpuarithm.hpp header must be compiled as C++
+#endif
+
 #include "opencv2/core/gpumat.hpp"
 
 namespace cv { namespace gpu {
@@ -331,7 +335,6 @@ struct CV_EXPORTS ConvolveBuf
     static Size estimateBlockSize(Size result_size, Size templ_size);
 };
 
-
 //! computes convolution (or cross-correlation) of two images using discrete Fourier transform
 //! supports source images of 32FC1 type only
 //! result matrix will have 32FC1 type
diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
index fd388edcd..8957d06d5 100644
--- a/modules/gpuarithm/perf/perf_core.cpp
+++ b/modules/gpuarithm/perf/perf_core.cpp
@@ -51,7 +51,7 @@ using namespace perf;
 //////////////////////////////////////////////////////////////////////
 // Merge
 
-PERF_TEST_P(Sz_Depth_Cn, Core_Merge,
+PERF_TEST_P(Sz_Depth_Cn, Merge,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH,
                     Values(2, 3, 4)))
@@ -92,7 +92,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Merge,
 //////////////////////////////////////////////////////////////////////
 // Split
 
-PERF_TEST_P(Sz_Depth_Cn, Core_Split,
+PERF_TEST_P(Sz_Depth_Cn, Split,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH,
                     Values(2, 3, 4)))
@@ -134,7 +134,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Split,
 //////////////////////////////////////////////////////////////////////
 // AddMat
 
-PERF_TEST_P(Sz_Depth, Core_AddMat,
+PERF_TEST_P(Sz_Depth, AddMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -170,7 +170,7 @@ PERF_TEST_P(Sz_Depth, Core_AddMat,
 //////////////////////////////////////////////////////////////////////
 // AddScalar
 
-PERF_TEST_P(Sz_Depth, Core_AddScalar,
+PERF_TEST_P(Sz_Depth, AddScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -205,7 +205,7 @@ PERF_TEST_P(Sz_Depth, Core_AddScalar,
 //////////////////////////////////////////////////////////////////////
 // SubtractMat
 
-PERF_TEST_P(Sz_Depth, Core_SubtractMat,
+PERF_TEST_P(Sz_Depth, SubtractMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -241,7 +241,7 @@ PERF_TEST_P(Sz_Depth, Core_SubtractMat,
 //////////////////////////////////////////////////////////////////////
 // SubtractScalar
 
-PERF_TEST_P(Sz_Depth, Core_SubtractScalar,
+PERF_TEST_P(Sz_Depth, SubtractScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -276,7 +276,7 @@ PERF_TEST_P(Sz_Depth, Core_SubtractScalar,
 //////////////////////////////////////////////////////////////////////
 // MultiplyMat
 
-PERF_TEST_P(Sz_Depth, Core_MultiplyMat,
+PERF_TEST_P(Sz_Depth, MultiplyMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -312,7 +312,7 @@ PERF_TEST_P(Sz_Depth, Core_MultiplyMat,
 //////////////////////////////////////////////////////////////////////
 // MultiplyScalar
 
-PERF_TEST_P(Sz_Depth, Core_MultiplyScalar,
+PERF_TEST_P(Sz_Depth, MultiplyScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -347,7 +347,7 @@ PERF_TEST_P(Sz_Depth, Core_MultiplyScalar,
 //////////////////////////////////////////////////////////////////////
 // DivideMat
 
-PERF_TEST_P(Sz_Depth, Core_DivideMat,
+PERF_TEST_P(Sz_Depth, DivideMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -383,7 +383,7 @@ PERF_TEST_P(Sz_Depth, Core_DivideMat,
 //////////////////////////////////////////////////////////////////////
 // DivideScalar
 
-PERF_TEST_P(Sz_Depth, Core_DivideScalar,
+PERF_TEST_P(Sz_Depth, DivideScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -418,7 +418,7 @@ PERF_TEST_P(Sz_Depth, Core_DivideScalar,
 //////////////////////////////////////////////////////////////////////
 // DivideScalarInv
 
-PERF_TEST_P(Sz_Depth, Core_DivideScalarInv,
+PERF_TEST_P(Sz_Depth, DivideScalarInv,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -453,7 +453,7 @@ PERF_TEST_P(Sz_Depth, Core_DivideScalarInv,
 //////////////////////////////////////////////////////////////////////
 // AbsDiffMat
 
-PERF_TEST_P(Sz_Depth, Core_AbsDiffMat,
+PERF_TEST_P(Sz_Depth, AbsDiffMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -489,7 +489,7 @@ PERF_TEST_P(Sz_Depth, Core_AbsDiffMat,
 //////////////////////////////////////////////////////////////////////
 // AbsDiffScalar
 
-PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar,
+PERF_TEST_P(Sz_Depth, AbsDiffScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH))
 {
@@ -524,7 +524,7 @@ PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar,
 //////////////////////////////////////////////////////////////////////
 // Abs
 
-PERF_TEST_P(Sz_Depth, Core_Abs,
+PERF_TEST_P(Sz_Depth, Abs,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_16S, CV_32F)))
 {
@@ -552,7 +552,7 @@ PERF_TEST_P(Sz_Depth, Core_Abs,
 //////////////////////////////////////////////////////////////////////
 // Sqr
 
-PERF_TEST_P(Sz_Depth, Core_Sqr,
+PERF_TEST_P(Sz_Depth, Sqr,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16S, CV_32F)))
 {
@@ -580,7 +580,7 @@ PERF_TEST_P(Sz_Depth, Core_Sqr,
 //////////////////////////////////////////////////////////////////////
 // Sqrt
 
-PERF_TEST_P(Sz_Depth, Core_Sqrt,
+PERF_TEST_P(Sz_Depth, Sqrt,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16S, CV_32F)))
 {
@@ -612,7 +612,7 @@ PERF_TEST_P(Sz_Depth, Core_Sqrt,
 //////////////////////////////////////////////////////////////////////
 // Log
 
-PERF_TEST_P(Sz_Depth, Core_Log,
+PERF_TEST_P(Sz_Depth, Log,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16S, CV_32F)))
 {
@@ -644,7 +644,7 @@ PERF_TEST_P(Sz_Depth, Core_Log,
 //////////////////////////////////////////////////////////////////////
 // Exp
 
-PERF_TEST_P(Sz_Depth, Core_Exp,
+PERF_TEST_P(Sz_Depth, Exp,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16S, CV_32F)))
 {
@@ -678,7 +678,7 @@ PERF_TEST_P(Sz_Depth, Core_Exp,
 
 DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double);
 
-PERF_TEST_P(Sz_Depth_Power, Core_Pow,
+PERF_TEST_P(Sz_Depth_Power, Pow,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16S, CV_32F),
                     Values(0.3, 2.0, 2.4)))
@@ -716,7 +716,7 @@ CV_ENUM(CmpCode, CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE)
 
 DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CmpCode);
 
-PERF_TEST_P(Sz_Depth_Code, Core_CompareMat,
+PERF_TEST_P(Sz_Depth_Code, CompareMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH,
                     CmpCode::all()))
@@ -754,7 +754,7 @@ PERF_TEST_P(Sz_Depth_Code, Core_CompareMat,
 //////////////////////////////////////////////////////////////////////
 // CompareScalar
 
-PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar,
+PERF_TEST_P(Sz_Depth_Code, CompareScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     ARITHM_MAT_DEPTH,
                     CmpCode::all()))
@@ -791,7 +791,7 @@ PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar,
 //////////////////////////////////////////////////////////////////////
 // BitwiseNot
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseNot,
+PERF_TEST_P(Sz_Depth, BitwiseNot,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S)))
 {
@@ -823,7 +823,7 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseNot,
 //////////////////////////////////////////////////////////////////////
 // BitwiseAndMat
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseAndMat,
+PERF_TEST_P(Sz_Depth, BitwiseAndMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S)))
 {
@@ -859,7 +859,7 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseAndMat,
 //////////////////////////////////////////////////////////////////////
 // BitwiseAndScalar
 
-PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar,
+PERF_TEST_P(Sz_Depth_Cn, BitwiseAndScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S),
                     GPU_CHANNELS_1_3_4))
@@ -899,7 +899,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar,
 //////////////////////////////////////////////////////////////////////
 // BitwiseOrMat
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat,
+PERF_TEST_P(Sz_Depth, BitwiseOrMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S)))
 {
@@ -935,7 +935,7 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat,
 //////////////////////////////////////////////////////////////////////
 // BitwiseOrScalar
 
-PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar,
+PERF_TEST_P(Sz_Depth_Cn, BitwiseOrScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S),
                     GPU_CHANNELS_1_3_4))
@@ -975,7 +975,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar,
 //////////////////////////////////////////////////////////////////////
 // BitwiseXorMat
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseXorMat,
+PERF_TEST_P(Sz_Depth, BitwiseXorMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S)))
 {
@@ -1011,7 +1011,7 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseXorMat,
 //////////////////////////////////////////////////////////////////////
 // BitwiseXorScalar
 
-PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar,
+PERF_TEST_P(Sz_Depth_Cn, BitwiseXorScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S),
                     GPU_CHANNELS_1_3_4))
@@ -1051,7 +1051,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar,
 //////////////////////////////////////////////////////////////////////
 // RShift
 
-PERF_TEST_P(Sz_Depth_Cn, Core_RShift,
+PERF_TEST_P(Sz_Depth_Cn, RShift,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S),
                     GPU_CHANNELS_1_3_4))
@@ -1085,7 +1085,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_RShift,
 //////////////////////////////////////////////////////////////////////
 // LShift
 
-PERF_TEST_P(Sz_Depth_Cn, Core_LShift,
+PERF_TEST_P(Sz_Depth_Cn, LShift,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S),
                     GPU_CHANNELS_1_3_4))
@@ -1119,7 +1119,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_LShift,
 //////////////////////////////////////////////////////////////////////
 // MinMat
 
-PERF_TEST_P(Sz_Depth, Core_MinMat,
+PERF_TEST_P(Sz_Depth, MinMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F)))
 {
@@ -1155,7 +1155,7 @@ PERF_TEST_P(Sz_Depth, Core_MinMat,
 //////////////////////////////////////////////////////////////////////
 // MinScalar
 
-PERF_TEST_P(Sz_Depth, Core_MinScalar,
+PERF_TEST_P(Sz_Depth, MinScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F)))
 {
@@ -1190,7 +1190,7 @@ PERF_TEST_P(Sz_Depth, Core_MinScalar,
 //////////////////////////////////////////////////////////////////////
 // MaxMat
 
-PERF_TEST_P(Sz_Depth, Core_MaxMat,
+PERF_TEST_P(Sz_Depth, MaxMat,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F)))
 {
@@ -1226,7 +1226,7 @@ PERF_TEST_P(Sz_Depth, Core_MaxMat,
 //////////////////////////////////////////////////////////////////////
 // MaxScalar
 
-PERF_TEST_P(Sz_Depth, Core_MaxScalar,
+PERF_TEST_P(Sz_Depth, MaxScalar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F)))
 {
@@ -1263,7 +1263,7 @@ PERF_TEST_P(Sz_Depth, Core_MaxScalar,
 
 DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth);
 
-PERF_TEST_P(Sz_3Depth, Core_AddWeighted,
+PERF_TEST_P(Sz_3Depth, AddWeighted,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F, CV_64F),
                     Values(CV_8U, CV_16U, CV_32F, CV_64F),
@@ -1310,7 +1310,7 @@ CV_FLAGS(GemmFlags, 0, GEMM_1_T, GEMM_2_T, GEMM_3_T)
 
 DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags);
 
-PERF_TEST_P(Sz_Type_Flags, Core_GEMM,
+PERF_TEST_P(Sz_Type_Flags, GEMM,
             Combine(Values(cv::Size(512, 512), cv::Size(1024, 1024)),
                     Values(CV_32FC1, CV_32FC2, CV_64FC1),
                     ALL_GEMM_FLAGS))
@@ -1356,7 +1356,7 @@ PERF_TEST_P(Sz_Type_Flags, Core_GEMM,
 //////////////////////////////////////////////////////////////////////
 // Transpose
 
-PERF_TEST_P(Sz_Type, Core_Transpose,
+PERF_TEST_P(Sz_Type, Transpose,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1)))
 {
@@ -1393,7 +1393,7 @@ CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Code, cv::Size, MatDepth, MatCn, FlipCode);
 
-PERF_TEST_P(Sz_Depth_Cn_Code, Core_Flip,
+PERF_TEST_P(Sz_Depth_Cn_Code, Flip,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F),
                     GPU_CHANNELS_1_3_4,
@@ -1431,7 +1431,7 @@ PERF_TEST_P(Sz_Depth_Cn_Code, Core_Flip,
 //////////////////////////////////////////////////////////////////////
 // LutOneChannel
 
-PERF_TEST_P(Sz_Type, Core_LutOneChannel,
+PERF_TEST_P(Sz_Type, LutOneChannel,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8UC1, CV_8UC3)))
 {
@@ -1466,7 +1466,7 @@ PERF_TEST_P(Sz_Type, Core_LutOneChannel,
 //////////////////////////////////////////////////////////////////////
 // LutMultiChannel
 
-PERF_TEST_P(Sz_Type, Core_LutMultiChannel,
+PERF_TEST_P(Sz_Type, LutMultiChannel,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values<MatType>(CV_8UC3)))
 {
@@ -1501,7 +1501,7 @@ PERF_TEST_P(Sz_Type, Core_LutMultiChannel,
 //////////////////////////////////////////////////////////////////////
 // MagnitudeComplex
 
-PERF_TEST_P(Sz, Core_MagnitudeComplex,
+PERF_TEST_P(Sz, MagnitudeComplex,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -1534,7 +1534,7 @@ PERF_TEST_P(Sz, Core_MagnitudeComplex,
 //////////////////////////////////////////////////////////////////////
 // MagnitudeSqrComplex
 
-PERF_TEST_P(Sz, Core_MagnitudeSqrComplex,
+PERF_TEST_P(Sz, MagnitudeSqrComplex,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -1560,7 +1560,7 @@ PERF_TEST_P(Sz, Core_MagnitudeSqrComplex,
 //////////////////////////////////////////////////////////////////////
 // Magnitude
 
-PERF_TEST_P(Sz, Core_Magnitude,
+PERF_TEST_P(Sz, Magnitude,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -1594,7 +1594,7 @@ PERF_TEST_P(Sz, Core_Magnitude,
 //////////////////////////////////////////////////////////////////////
 // MagnitudeSqr
 
-PERF_TEST_P(Sz, Core_MagnitudeSqr,
+PERF_TEST_P(Sz, MagnitudeSqr,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -1626,7 +1626,7 @@ PERF_TEST_P(Sz, Core_MagnitudeSqr,
 
 DEF_PARAM_TEST(Sz_AngleInDegrees, cv::Size, bool);
 
-PERF_TEST_P(Sz_AngleInDegrees, Core_Phase,
+PERF_TEST_P(Sz_AngleInDegrees, Phase,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Bool()))
 {
@@ -1662,7 +1662,7 @@ PERF_TEST_P(Sz_AngleInDegrees, Core_Phase,
 //////////////////////////////////////////////////////////////////////
 // CartToPolar
 
-PERF_TEST_P(Sz_AngleInDegrees, Core_CartToPolar,
+PERF_TEST_P(Sz_AngleInDegrees, CartToPolar,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Bool()))
 {
@@ -1702,7 +1702,7 @@ PERF_TEST_P(Sz_AngleInDegrees, Core_CartToPolar,
 //////////////////////////////////////////////////////////////////////
 // PolarToCart
 
-PERF_TEST_P(Sz_AngleInDegrees, Core_PolarToCart,
+PERF_TEST_P(Sz_AngleInDegrees, PolarToCart,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Bool()))
 {
@@ -1742,7 +1742,7 @@ PERF_TEST_P(Sz_AngleInDegrees, Core_PolarToCart,
 //////////////////////////////////////////////////////////////////////
 // MeanStdDev
 
-PERF_TEST_P(Sz, Core_MeanStdDev,
+PERF_TEST_P(Sz, MeanStdDev,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -1780,7 +1780,7 @@ PERF_TEST_P(Sz, Core_MeanStdDev,
 
 DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType);
 
-PERF_TEST_P(Sz_Depth_Norm, Core_Norm,
+PERF_TEST_P(Sz_Depth_Norm, Norm,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32S, CV_32F),
                     Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
@@ -1820,7 +1820,7 @@ PERF_TEST_P(Sz_Depth_Norm, Core_Norm,
 
 DEF_PARAM_TEST(Sz_Norm, cv::Size, NormType);
 
-PERF_TEST_P(Sz_Norm, Core_NormDiff,
+PERF_TEST_P(Sz_Norm, NormDiff,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
 {
@@ -1857,7 +1857,7 @@ PERF_TEST_P(Sz_Norm, Core_NormDiff,
 //////////////////////////////////////////////////////////////////////
 // Sum
 
-PERF_TEST_P(Sz_Depth_Cn, Core_Sum,
+PERF_TEST_P(Sz_Depth_Cn, Sum,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F),
                     GPU_CHANNELS_1_3_4))
@@ -1894,7 +1894,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Sum,
 //////////////////////////////////////////////////////////////////////
 // SumAbs
 
-PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs,
+PERF_TEST_P(Sz_Depth_Cn, SumAbs,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F),
                     GPU_CHANNELS_1_3_4))
@@ -1927,7 +1927,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs,
 //////////////////////////////////////////////////////////////////////
 // SumSqr
 
-PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr,
+PERF_TEST_P(Sz_Depth_Cn, SumSqr,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values<MatDepth>(CV_8U, CV_16U, CV_32F),
                     GPU_CHANNELS_1_3_4))
@@ -1960,7 +1960,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr,
 //////////////////////////////////////////////////////////////////////
 // MinMax
 
-PERF_TEST_P(Sz_Depth, Core_MinMax,
+PERF_TEST_P(Sz_Depth, MinMax,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
@@ -1998,7 +1998,7 @@ PERF_TEST_P(Sz_Depth, Core_MinMax,
 //////////////////////////////////////////////////////////////////////
 // MinMaxLoc
 
-PERF_TEST_P(Sz_Depth, Core_MinMaxLoc,
+PERF_TEST_P(Sz_Depth, MinMaxLoc,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
@@ -2038,7 +2038,7 @@ PERF_TEST_P(Sz_Depth, Core_MinMaxLoc,
 //////////////////////////////////////////////////////////////////////
 // CountNonZero
 
-PERF_TEST_P(Sz_Depth, Core_CountNonZero,
+PERF_TEST_P(Sz_Depth, CountNonZero,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
@@ -2077,7 +2077,7 @@ CV_ENUM(ReduceDim, Rows, Cols)
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim);
 
-PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce,
+PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_16S, CV_32F),
                     Values(1, 2, 3, 4),
@@ -2118,7 +2118,7 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce,
 
 DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType);
 
-PERF_TEST_P(Sz_Depth_NormType, Core_Normalize,
+PERF_TEST_P(Sz_Depth_NormType, Normalize,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F, CV_64F),
                     Values(NormType(cv::NORM_INF),
@@ -2163,7 +2163,7 @@ CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMP
 
 DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
 
-PERF_TEST_P(Sz_Flags, ImgProc_MulSpectrums,
+PERF_TEST_P(Sz_Flags, MulSpectrums,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(0, DftFlags(cv::DFT_ROWS))))
 {
@@ -2197,7 +2197,7 @@ PERF_TEST_P(Sz_Flags, ImgProc_MulSpectrums,
 //////////////////////////////////////////////////////////////////////
 // MulAndScaleSpectrums
 
-PERF_TEST_P(Sz, ImgProc_MulAndScaleSpectrums,
+PERF_TEST_P(Sz, MulAndScaleSpectrums,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -2227,7 +2227,7 @@ PERF_TEST_P(Sz, ImgProc_MulAndScaleSpectrums,
 //////////////////////////////////////////////////////////////////////
 // Dft
 
-PERF_TEST_P(Sz_Flags, ImgProc_Dft,
+PERF_TEST_P(Sz_Flags, Dft,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
 {
@@ -2258,14 +2258,12 @@ PERF_TEST_P(Sz_Flags, ImgProc_Dft,
     }
 }
 
-#ifdef HAVE_OPENCV_IMGPROC
-
 //////////////////////////////////////////////////////////////////////
 // CopyMakeBorder
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode);
 
-PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder,
+PERF_TEST_P(Sz_Depth_Cn_Border, CopyMakeBorder,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16U, CV_32F),
                     GPU_CHANNELS_1_3_4,
@@ -2303,7 +2301,7 @@ PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder,
 //////////////////////////////////////////////////////////////////////
 // Integral
 
-PERF_TEST_P(Sz, ImgProc_Integral,
+PERF_TEST_P(Sz, Integral,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -2334,7 +2332,7 @@ PERF_TEST_P(Sz, ImgProc_Integral,
 //////////////////////////////////////////////////////////////////////
 // IntegralSqr
 
-PERF_TEST_P(Sz, ImgProc_IntegralSqr,
+PERF_TEST_P(Sz, IntegralSqr,
             GPU_TYPICAL_MAT_SIZES)
 {
     const cv::Size size = GetParam();
@@ -2362,7 +2360,7 @@ PERF_TEST_P(Sz, ImgProc_IntegralSqr,
 
 DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool);
 
-PERF_TEST_P(Sz_KernelSz_Ccorr, ImgProc_Convolve,
+PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(17, 27, 32, 64),
                     Bool()))
@@ -2405,4 +2403,41 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, ImgProc_Convolve,
     }
 }
 
-#endif
+//////////////////////////////////////////////////////////////////////
+// Threshold
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+#define ALL_THRESH_OPS ValuesIn(ThreshOp::all())
+
+DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp);
+
+PERF_TEST_P(Sz_Depth_Op, Threshold,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+            Values(CV_8U, CV_16U, CV_32F, CV_64F),
+            ALL_THRESH_OPS))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int threshOp = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::threshold(d_src, dst, 100.0, 255.0, threshOp);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuarithm/perf/perf_precomp.hpp b/modules/gpuarithm/perf/perf_precomp.hpp
index bee378064..0b50ab0f9 100644
--- a/modules/gpuarithm/perf/perf_precomp.hpp
+++ b/modules/gpuarithm/perf/perf_precomp.hpp
@@ -54,14 +54,9 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
-#include "opencv2/core.hpp"
 #include "opencv2/gpuarithm.hpp"
-
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_IMGPROC
-#  include "opencv2/imgproc.hpp"
-#endif
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index f5af24ee2..cc85cc72a 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -234,7 +234,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
         {
             if (tr3)
             {
-                transpose(src3, dst, stream);
+                gpu::transpose(src3, dst, stream);
             }
             else
             {
@@ -638,13 +638,13 @@ void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int
     {
         double smin = 0, smax = 0;
         double dmin = std::min(a, b), dmax = std::max(a, b);
-        minMax(src, &smin, &smax, mask, norm_buf);
+        gpu::minMax(src, &smin, &smax, mask, norm_buf);
         scale = (dmax - dmin) * (smax - smin > std::numeric_limits<double>::epsilon() ? 1.0 / (smax - smin) : 0.0);
         shift = dmin - smin * scale;
     }
     else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF)
     {
-        scale = norm(src, norm_type, mask, norm_buf);
+        scale = gpu::norm(src, norm_type, mask, norm_buf);
         scale = scale > std::numeric_limits<double>::epsilon() ? a / scale : 0.0;
         shift = 0;
     }
@@ -779,7 +779,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
 {
     GpuMat buffer;
-    integralBuffered(src, sum, buffer, s);
+    gpu::integralBuffered(src, sum, buffer, s);
 }
 
 namespace cv { namespace gpu { namespace cudev
@@ -891,6 +891,8 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
 //////////////////////////////////////////////////////////////////////////////
 // mulSpectrums
 
+#ifdef HAVE_CUFFT
+
 namespace cv { namespace gpu { namespace cudev
 {
     void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
@@ -898,9 +900,20 @@ namespace cv { namespace gpu { namespace cudev
     void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
 }}}
 
+#endif
+
 void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
 {
-    (void)flags;
+#ifndef HAVE_CUFFT
+    (void) a;
+    (void) b;
+    (void) c;
+    (void) flags;
+    (void) conjB;
+    (void) stream;
+    throw_no_cuda();
+#else
+    (void) flags;
 
     typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream);
 
@@ -913,11 +926,14 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag
 
     Caller caller = callers[(int)conjB];
     caller(a, b, c, StreamAccessor::getStream(stream));
+#endif
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // mulAndScaleSpectrums
 
+#ifdef HAVE_CUFFT
+
 namespace cv { namespace gpu { namespace cudev
 {
     void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
@@ -925,8 +941,20 @@ namespace cv { namespace gpu { namespace cudev
     void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
 }}}
 
+#endif
+
 void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
 {
+#ifndef HAVE_CUFFT
+    (void) a;
+    (void) b;
+    (void) c;
+    (void) flags;
+    (void) scale;
+    (void) conjB;
+    (void) stream;
+    throw_no_cuda();
+#else
     (void)flags;
 
     typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, PtrStepSz<cufftComplex>, cudaStream_t stream);
@@ -939,6 +967,7 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,
 
     Caller caller = callers[(int)conjB];
     caller(a, b, scale, c, StreamAccessor::getStream(stream));
+#endif
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -947,15 +976,12 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,
 void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stream& stream)
 {
 #ifndef HAVE_CUFFT
-
-    OPENCV_GPU_UNUSED(src);
-    OPENCV_GPU_UNUSED(dst);
-    OPENCV_GPU_UNUSED(dft_size);
-    OPENCV_GPU_UNUSED(flags);
-    OPENCV_GPU_UNUSED(stream);
-
+    (void) src;
+    (void) dst;
+    (void) dft_size;
+    (void) flags;
+    (void) stream;
     throw_no_cuda();
-
 #else
 
     CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
@@ -1094,16 +1120,22 @@ Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size
 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr)
 {
     ConvolveBuf buf;
-    convolve(image, templ, result, ccorr, buf);
+    gpu::convolve(image, templ, result, ccorr, buf);
 }
 
 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
 {
-    using namespace ::cv::gpu::cudev::imgproc;
-
 #ifndef HAVE_CUFFT
+    (void) image;
+    (void) templ;
+    (void) result;
+    (void) ccorr;
+    (void) buf;
+    (void) stream;
     throw_no_cuda();
 #else
+    using namespace cv::gpu::cudev::imgproc;
+
     CV_Assert(image.type() == CV_32F);
     CV_Assert(templ.type() == CV_32F);
 
@@ -1129,8 +1161,8 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
     cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) );
 
     GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);
-    copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
-                   templ_block.cols - templ_roi.cols, 0, Scalar(), stream);
+    gpu::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
+                        templ_block.cols - templ_roi.cols, 0, Scalar(), stream);
 
     cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(),
                                templ_spect.ptr<cufftComplex>()));
@@ -1144,13 +1176,13 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
                                 std::min(y + dft_size.height, image.rows) - y);
             GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x),
                              image.step);
-            copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
-                           0, image_block.cols - image_roi.cols, 0, Scalar(), stream);
+            gpu::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
+                                0, image_block.cols - image_roi.cols, 0, Scalar(), stream);
 
             cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(),
                                        image_spect.ptr<cufftComplex>()));
-            mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
-                                 1.f / dft_size.area(), ccorr, stream);
+            gpu::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
+                                      1.f / dft_size.area(), ccorr, stream);
             cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
                                        result_data.ptr<cufftReal>()));
 
diff --git a/modules/gpuarithm/src/matrix_reductions.cpp b/modules/gpuarithm/src/matrix_reductions.cpp
index dbb6c0945..6ffde1722 100644
--- a/modules/gpuarithm/src/matrix_reductions.cpp
+++ b/modules/gpuarithm/src/matrix_reductions.cpp
@@ -155,12 +155,12 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat
 double cv::gpu::norm(const GpuMat& src, int normType)
 {
     GpuMat buf;
-    return norm(src, normType, GpuMat(), buf);
+    return gpu::norm(src, normType, GpuMat(), buf);
 }
 
 double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
 {
-    return norm(src, normType, GpuMat(), buf);
+    return gpu::norm(src, normType, GpuMat(), buf);
 }
 
 double cv::gpu::norm(const GpuMat& src, int normType, const GpuMat& mask, GpuMat& buf)
@@ -171,14 +171,14 @@ double cv::gpu::norm(const GpuMat& src, int normType, const GpuMat& mask, GpuMat
     GpuMat src_single_channel = src.reshape(1);
 
     if (normType == NORM_L1)
-        return absSum(src_single_channel, mask, buf)[0];
+        return gpu::absSum(src_single_channel, mask, buf)[0];
 
     if (normType == NORM_L2)
-        return std::sqrt(sqrSum(src_single_channel, mask, buf)[0]);
+        return std::sqrt(gpu::sqrSum(src_single_channel, mask, buf)[0]);
 
     // NORM_INF
     double min_val, max_val;
-    minMax(src_single_channel, &min_val, &max_val, mask, buf);
+    gpu::minMax(src_single_channel, &min_val, &max_val, mask, buf);
     return std::max(std::abs(min_val), std::abs(max_val));
 }
 
@@ -232,12 +232,12 @@ namespace sum
 Scalar cv::gpu::sum(const GpuMat& src)
 {
     GpuMat buf;
-    return sum(src, GpuMat(), buf);
+    return gpu::sum(src, GpuMat(), buf);
 }
 
 Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
 {
-    return sum(src, GpuMat(), buf);
+    return gpu::sum(src, GpuMat(), buf);
 }
 
 Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
@@ -278,12 +278,12 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
 Scalar cv::gpu::absSum(const GpuMat& src)
 {
     GpuMat buf;
-    return absSum(src, GpuMat(), buf);
+    return gpu::absSum(src, GpuMat(), buf);
 }
 
 Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
 {
-    return absSum(src, GpuMat(), buf);
+    return gpu::absSum(src, GpuMat(), buf);
 }
 
 Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
@@ -324,12 +324,12 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
 Scalar cv::gpu::sqrSum(const GpuMat& src)
 {
     GpuMat buf;
-    return sqrSum(src, GpuMat(), buf);
+    return gpu::sqrSum(src, GpuMat(), buf);
 }
 
 Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
 {
-    return sqrSum(src, GpuMat(), buf);
+    return gpu::sqrSum(src, GpuMat(), buf);
 }
 
 Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
@@ -381,7 +381,7 @@ namespace minMax
 void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask)
 {
     GpuMat buf;
-    minMax(src, minVal, maxVal, mask, buf);
+    gpu::minMax(src, minVal, maxVal, mask, buf);
 }
 
 void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
@@ -431,7 +431,7 @@ namespace minMaxLoc
 void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
 {
     GpuMat valBuf, locBuf;
-    minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
+    gpu::minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
 }
 
 void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
diff --git a/modules/gpuarithm/src/precomp.hpp b/modules/gpuarithm/src/precomp.hpp
index 05493aa96..ce497eeba 100644
--- a/modules/gpuarithm/src/precomp.hpp
+++ b/modules/gpuarithm/src/precomp.hpp
@@ -45,6 +45,8 @@
 
 #include <limits>
 
+#include "cvconfig.h"
+
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/core_c.h"
diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp
index dd8f854d8..aea7086a8 100644
--- a/modules/gpuarithm/test/test_core.cpp
+++ b/modules/gpuarithm/test/test_core.cpp
@@ -104,7 +104,7 @@ GPU_TEST_P(Merge, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Merge, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Merge, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -171,7 +171,7 @@ GPU_TEST_P(Split, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Split, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Split, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -237,7 +237,7 @@ GPU_TEST_P(Add_Array, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -299,7 +299,7 @@ GPU_TEST_P(Add_Array_Mask, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array_Mask, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array_Mask, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -387,7 +387,7 @@ GPU_TEST_P(Add_Scalar, WithMask)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Scalar, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Scalar, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -452,7 +452,7 @@ GPU_TEST_P(Subtract_Array, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -514,7 +514,7 @@ GPU_TEST_P(Subtract_Array_Mask, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array_Mask, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array_Mask, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -602,7 +602,7 @@ GPU_TEST_P(Subtract_Scalar, WithMask)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Scalar, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Scalar, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -696,7 +696,7 @@ GPU_TEST_P(Multiply_Array, WithScale)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Array, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -796,7 +796,7 @@ GPU_TEST_P(Multiply_Array_Special, Case_16SC4x_32FC1)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Array_Special, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array_Special, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     WHOLE_SUBMAT));
@@ -882,7 +882,7 @@ GPU_TEST_P(Multiply_Scalar, WithScale)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Scalar, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -976,7 +976,7 @@ GPU_TEST_P(Divide_Array, WithScale)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Array, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -1076,7 +1076,7 @@ GPU_TEST_P(Divide_Array_Special, Case_16SC4x_32FC1)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Array_Special, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array_Special, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     WHOLE_SUBMAT));
@@ -1161,7 +1161,7 @@ GPU_TEST_P(Divide_Scalar, WithScale)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -1217,7 +1217,7 @@ GPU_TEST_P(Divide_Scalar_Inv, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar_Inv, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar_Inv, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DEPTH_PAIRS,
@@ -1302,7 +1302,7 @@ GPU_TEST_P(AbsDiff, Scalar)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, AbsDiff, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -1341,7 +1341,7 @@ GPU_TEST_P(Abs, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Abs, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)),
@@ -1381,7 +1381,7 @@ GPU_TEST_P(Sqr, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sqr, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U),
@@ -1451,7 +1451,7 @@ GPU_TEST_P(Sqrt, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sqrt, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U),
@@ -1521,7 +1521,7 @@ GPU_TEST_P(Log, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Log, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U),
@@ -1601,7 +1601,7 @@ GPU_TEST_P(Exp, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Exp, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U),
@@ -1664,7 +1664,7 @@ GPU_TEST_P(Compare_Array, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Array, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Compare_Array, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -1775,7 +1775,7 @@ GPU_TEST_P(Compare_Scalar, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Compare_Scalar, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     TYPES(CV_8U, CV_64F, 1, 4),
@@ -1847,7 +1847,7 @@ GPU_TEST_P(Bitwise_Array, Xor)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Array, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Bitwise_Array, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     TYPES(CV_8U, CV_32S, 1, 4)));
@@ -1913,7 +1913,7 @@ GPU_TEST_P(Bitwise_Scalar, Xor)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Scalar, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Bitwise_Scalar, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
@@ -1988,7 +1988,7 @@ GPU_TEST_P(RShift, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, RShift, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, RShift, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U),
@@ -2068,7 +2068,7 @@ GPU_TEST_P(LShift, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, LShift, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, LShift, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
@@ -2152,7 +2152,7 @@ GPU_TEST_P(Min, Scalar)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Min, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Min, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -2235,7 +2235,7 @@ GPU_TEST_P(Max, Scalar)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Max, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Max, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -2294,7 +2294,7 @@ GPU_TEST_P(Pow, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Pow, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -2357,7 +2357,7 @@ GPU_TEST_P(AddWeighted, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, AddWeighted, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -2437,7 +2437,7 @@ GPU_TEST_P(GEMM, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, GEMM, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, GEMM, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_32FC1), MatType(CV_32FC2), MatType(CV_64FC1), MatType(CV_64FC2)),
@@ -2495,7 +2495,7 @@ GPU_TEST_P(Transpose, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Transpose, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Transpose, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1),
@@ -2547,7 +2547,7 @@ GPU_TEST_P(Flip, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Flip, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Flip, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1),
@@ -2614,7 +2614,7 @@ GPU_TEST_P(LUT, MultiChannel)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, LUT, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, LUT, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3)),
@@ -2699,7 +2699,7 @@ GPU_TEST_P(Magnitude, Sqr_Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-1);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Magnitude, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Magnitude, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     WHOLE_SUBMAT));
@@ -2744,7 +2744,7 @@ GPU_TEST_P(Phase, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, angleInDegrees ? 1e-2 : 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Phase, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Phase, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
@@ -2788,7 +2788,7 @@ GPU_TEST_P(CartToPolar, Accuracy)
     EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, CartToPolar, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, CartToPolar, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
@@ -2832,7 +2832,7 @@ GPU_TEST_P(PolarToCart, Accuracy)
     EXPECT_MAT_NEAR(y_gold, y, 1e-4);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, PolarToCart, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, PolarToCart, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
@@ -2889,7 +2889,7 @@ GPU_TEST_P(MeanStdDev, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, MeanStdDev, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MeanStdDev, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     WHOLE_SUBMAT));
@@ -2930,7 +2930,7 @@ GPU_TEST_P(Norm, Accuracy)
     EXPECT_NEAR(val_gold, val, depth < CV_32F ? 0.0 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Norm, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Norm, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U),
@@ -2975,7 +2975,7 @@ GPU_TEST_P(NormDiff, Accuracy)
     EXPECT_NEAR(val_gold, val, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, NormDiff, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, NormDiff, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)),
@@ -3113,7 +3113,7 @@ GPU_TEST_P(Sum, Sqr)
     EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sum, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     TYPES(CV_8U, CV_64F, 1, 4),
@@ -3230,7 +3230,7 @@ GPU_TEST_P(MinMax, NullPtr)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, MinMax, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MinMax, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -3396,7 +3396,7 @@ GPU_TEST_P(MinMaxLoc, NullPtr)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, MinMaxLoc, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MinMaxLoc, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -3451,7 +3451,7 @@ GPU_TEST_P(CountNonZero, Accuracy)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, CountNonZero, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, CountNonZero, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -3529,7 +3529,7 @@ GPU_TEST_P(Reduce, Cols)
     EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 0.0 : 0.02);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Reduce, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatDepth(CV_8U),
@@ -3600,7 +3600,7 @@ GPU_TEST_P(Normalize, WithMask)
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Core, Normalize, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Normalize, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     ALL_DEPTH,
@@ -3658,7 +3658,7 @@ GPU_TEST_P(MulSpectrums, Scaled)
     EXPECT_MAT_NEAR(c_gold, c, 1e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MulSpectrums, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MulSpectrums, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
@@ -3791,7 +3791,7 @@ GPU_TEST_P(Dft, R2CThenC2R)
     testR2CThenC2R("single row 1", cols + 1, 1, true);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Dft, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Dft, ALL_DEVICES);
 
 ////////////////////////////////////////////////////////
 // Convolve
@@ -3873,14 +3873,12 @@ GPU_TEST_P(Convolve, Accuracy)
     EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Convolve, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Convolve, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
     testing::Values(Ccorr(false), Ccorr(true))));
 
-#ifdef HAVE_OPENCV_IMGPROC
-
 //////////////////////////////////////////////////////////////////////////////
 // CopyMakeBorder
 
@@ -3925,7 +3923,7 @@ GPU_TEST_P(CopyMakeBorder, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, CopyMakeBorder, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1),
@@ -3973,11 +3971,57 @@ GPU_TEST_P(Integral, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Integral, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Integral, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     WHOLE_SUBMAT));
 
-#endif
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// Threshold
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
+
+PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int threshOp;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        threshOp = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Threshold, Accuracy)
+{
+    cv::Mat src = randomMat(size, type);
+    double maxVal = randomDouble(20.0, 127.0);
+    double thresh = randomDouble(0.0, maxVal);
+
+    cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi);
+    cv::gpu::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp);
+
+    cv::Mat dst_gold;
+    cv::threshold(src, dst_gold, thresh, maxVal, threshOp);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Threshold, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)),
+    ALL_THRESH_OPS,
+    WHOLE_SUBMAT));
 
 #endif // HAVE_CUDA
diff --git a/modules/gpuarithm/test/test_precomp.hpp b/modules/gpuarithm/test/test_precomp.hpp
index 800ed31c0..f9d3ad43c 100644
--- a/modules/gpuarithm/test/test_precomp.hpp
+++ b/modules/gpuarithm/test/test_precomp.hpp
@@ -54,13 +54,8 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_test.hpp"
 
-#include "opencv2/core.hpp"
 #include "opencv2/gpuarithm.hpp"
-
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_IMGPROC
-#  include "opencv2/imgproc.hpp"
-#endif
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
 
 #endif
diff --git a/modules/gpuarithm/test/test_threshold.cpp b/modules/gpuarithm/test/test_threshold.cpp
deleted file mode 100644
index 52ebd7f59..000000000
--- a/modules/gpuarithm/test/test_threshold.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_CUDA
-
-using namespace cvtest;
-
-CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
-
-PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int threshOp;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        threshOp = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Threshold, Accuracy)
-{
-    cv::Mat src = randomMat(size, type);
-    double maxVal = randomDouble(20.0, 127.0);
-    double thresh = randomDouble(0.0, maxVal);
-
-    cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi);
-    cv::gpu::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp);
-
-    cv::Mat dst_gold;
-    cv::threshold(src, dst_gold, thresh, maxVal, threshOp);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Threshold, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)),
-    ThreshOp::all(),
-    WHOLE_SUBMAT));
-
-#endif // HAVE_CUDA

From fc3730fcc217070fc96cb0857e78fe1c6d8a100c Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:45:48 +0400
Subject: [PATCH 26/49] gpuwarping module for image warping

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/include/opencv2/gpu.hpp           |   1 +
 .../gpuarithm/doc/operations_on_matrices.rst  | 163 +++++
 modules/gpuarithm/perf/perf_core.cpp          |   2 +-
 modules/gpufeatures2d/CMakeLists.txt          |   2 +-
 modules/gpufeatures2d/src/precomp.hpp         |   3 +-
 modules/gpuimgproc/doc/image_processing.rst   | 429 +------------
 .../gpuimgproc/include/opencv2/gpuimgproc.hpp |  74 ---
 modules/gpuimgproc/perf/perf_imgproc.cpp      | 583 -----------------
 modules/gpuimgproc/src/cuda/imgproc.cu        | 166 -----
 modules/gpuimgproc/src/imgproc.cpp            | 194 +-----
 modules/gpuimgproc/src/precomp.hpp            |   2 +-
 modules/gpuimgproc/test/test_denoising.cpp    |  47 --
 modules/gpuimgproc/test/test_imgproc.cpp      |  47 ++
 modules/gpuimgproc/test/test_precomp.hpp      |   2 -
 modules/gpuobjdetect/CMakeLists.txt           |   2 +-
 modules/gpuobjdetect/src/precomp.hpp          |   1 +
 modules/gpuvideo/CMakeLists.txt               |   2 +-
 modules/gpuvideo/src/precomp.hpp              |   1 +
 modules/gpuwarping/CMakeLists.txt             |   9 +
 modules/gpuwarping/doc/gpuwarping.rst         |   8 +
 modules/gpuwarping/doc/warping.rst            | 251 ++++++++
 .../gpuwarping/include/opencv2/gpuwarping.hpp | 131 ++++
 modules/gpuwarping/perf/perf_main.cpp         |  47 ++
 modules/gpuwarping/perf/perf_precomp.cpp      |  43 ++
 modules/gpuwarping/perf/perf_precomp.hpp      |  64 ++
 modules/gpuwarping/perf/perf_warping.cpp      | 592 ++++++++++++++++++
 .../gpuwarping/src/cuda/build_warp_maps.cu    | 221 +++++++
 .../src/cuda/pyr_down.cu                      |   0
 .../src/cuda/pyr_up.cu                        |   0
 .../src/cuda/remap.cu                         |   0
 .../src/cuda/resize.cu                        |   0
 .../src/cuda/warp.cu                          |   0
 modules/gpuwarping/src/precomp.cpp            |  43 ++
 modules/gpuwarping/src/precomp.hpp            |  57 ++
 .../src/pyramids.cpp                          |  25 +-
 .../{gpuimgproc => gpuwarping}/src/remap.cpp  |   0
 .../{gpuimgproc => gpuwarping}/src/resize.cpp |  13 +-
 .../{gpuimgproc => gpuwarping}/src/warp.cpp   | 198 +++++-
 .../test/interpolation.hpp                    |   0
 modules/gpuwarping/test/test_main.cpp         |  45 ++
 modules/gpuwarping/test/test_precomp.cpp      |  43 ++
 modules/gpuwarping/test/test_precomp.hpp      |  62 ++
 .../test/test_pyramids.cpp                    |   4 +-
 .../test/test_remap.cpp                       |   2 +-
 .../test/test_resize.cpp                      |   6 +-
 .../test/test_warp_affine.cpp                 |   6 +-
 .../test/test_warp_perspective.cpp            |   6 +-
 samples/cpp/CMakeLists.txt                    |   1 +
 samples/gpu/CMakeLists.txt                    |   5 +-
 50 files changed, 2068 insertions(+), 1537 deletions(-)
 create mode 100644 modules/gpuwarping/CMakeLists.txt
 create mode 100644 modules/gpuwarping/doc/gpuwarping.rst
 create mode 100644 modules/gpuwarping/doc/warping.rst
 create mode 100644 modules/gpuwarping/include/opencv2/gpuwarping.hpp
 create mode 100644 modules/gpuwarping/perf/perf_main.cpp
 create mode 100644 modules/gpuwarping/perf/perf_precomp.cpp
 create mode 100644 modules/gpuwarping/perf/perf_precomp.hpp
 create mode 100644 modules/gpuwarping/perf/perf_warping.cpp
 create mode 100644 modules/gpuwarping/src/cuda/build_warp_maps.cu
 rename modules/{gpuimgproc => gpuwarping}/src/cuda/pyr_down.cu (100%)
 rename modules/{gpuimgproc => gpuwarping}/src/cuda/pyr_up.cu (100%)
 rename modules/{gpuimgproc => gpuwarping}/src/cuda/remap.cu (100%)
 rename modules/{gpuimgproc => gpuwarping}/src/cuda/resize.cu (100%)
 rename modules/{gpuimgproc => gpuwarping}/src/cuda/warp.cu (100%)
 create mode 100644 modules/gpuwarping/src/precomp.cpp
 create mode 100644 modules/gpuwarping/src/precomp.hpp
 rename modules/{gpuimgproc => gpuwarping}/src/pyramids.cpp (97%)
 rename modules/{gpuimgproc => gpuwarping}/src/remap.cpp (100%)
 rename modules/{gpuimgproc => gpuwarping}/src/resize.cpp (96%)
 rename modules/{gpuimgproc => gpuwarping}/src/warp.cpp (70%)
 rename modules/{gpuimgproc => gpuwarping}/test/interpolation.hpp (100%)
 create mode 100644 modules/gpuwarping/test/test_main.cpp
 create mode 100644 modules/gpuwarping/test/test_precomp.cpp
 create mode 100644 modules/gpuwarping/test/test_precomp.hpp
 rename modules/{gpuimgproc => gpuwarping}/test/test_pyramids.cpp (97%)
 rename modules/{gpuimgproc => gpuwarping}/test/test_remap.cpp (99%)
 rename modules/{gpuimgproc => gpuwarping}/test/test_resize.cpp (97%)
 rename modules/{gpuimgproc => gpuwarping}/test/test_warp_affine.cpp (98%)
 rename modules/{gpuimgproc => gpuwarping}/test/test_warp_perspective.cpp (98%)

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index de132cf9f..578957037 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -6,7 +6,7 @@ set(the_description "GPU-accelerated Computer Vision")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_define_module(gpu opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc
+ocv_define_module(gpu opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
                       opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d opencv_gpuobjdetect)
 
 if(HAVE_CUDA)
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index e2f747806..10fbbd7d8 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -46,6 +46,7 @@
 #include "opencv2/core/gpumat.hpp"
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
+#include "opencv2/gpuwarping.hpp"
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpufeatures2d.hpp"
 #include "opencv2/gpuvideo.hpp"
diff --git a/modules/gpuarithm/doc/operations_on_matrices.rst b/modules/gpuarithm/doc/operations_on_matrices.rst
index d1762f442..a25100728 100644
--- a/modules/gpuarithm/doc/operations_on_matrices.rst
+++ b/modules/gpuarithm/doc/operations_on_matrices.rst
@@ -272,3 +272,166 @@ Normalizes the norm or value range of an array.
     :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
 
 .. seealso:: :ocv:func:`normalize`
+
+
+
+gpu::mulSpectrums
+---------------------
+Performs a per-element multiplication of two Fourier spectrums.
+
+.. ocv:function:: void gpu::mulSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream=Stream::Null() )
+
+    :param a: First spectrum.
+
+    :param b: Second spectrum with the same size and type as  ``a`` .
+
+    :param c: Destination spectrum.
+
+    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
+
+    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
+
+    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
+
+.. seealso:: :ocv:func:`mulSpectrums`
+
+
+
+gpu::mulAndScaleSpectrums
+-----------------------------
+Performs a per-element multiplication of two Fourier spectrums and scales the result.
+
+.. ocv:function:: void gpu::mulAndScaleSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream=Stream::Null() )
+
+    :param a: First spectrum.
+
+    :param b: Second spectrum with the same size and type as  ``a`` .
+
+    :param c: Destination spectrum.
+
+    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
+
+    :param scale: Scale constant.
+
+    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
+
+    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
+
+.. seealso:: :ocv:func:`mulSpectrums`
+
+
+
+gpu::dft
+------------
+Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix.
+
+.. ocv:function:: void gpu::dft( const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream=Stream::Null() )
+
+    :param src: Source matrix (real or complex).
+
+    :param dst: Destination matrix (real or complex).
+
+    :param dft_size: Size of a discrete Fourier transform.
+
+    :param flags: Optional flags:
+
+        * **DFT_ROWS** transforms each individual row of the source matrix.
+
+        * **DFT_SCALE** scales the result: divide it by the number of elements in the transform (obtained from  ``dft_size`` ).
+
+        * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively).
+
+        * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real.
+
+Use to handle real matrices ( ``CV32FC1`` ) and complex matrices in the interleaved format ( ``CV32FC2`` ).
+
+The source matrix should be continuous, otherwise reallocation and data copying is performed. The function chooses an operation mode depending on the flags, size, and channel count of the source matrix:
+
+    * If the source matrix is complex and the output is not specified as real, the destination matrix is complex and has the ``dft_size``    size and ``CV_32FC2``    type. The destination matrix contains a full result of the DFT (forward or inverse).
+
+    * If the source matrix is complex and the output is specified as real, the function assumes that its input is the result of the forward transform (see the next item). The destination matrix has the ``dft_size`` size and ``CV_32FC1`` type. It contains the result of the inverse DFT.
+
+    * If the source matrix is real (its type is ``CV_32FC1`` ), forward DFT is performed. The result of the DFT is packed into complex ( ``CV_32FC2`` ) matrix. So, the width of the destination matrix is ``dft_size.width / 2 + 1`` . But if the source is a single column, the height is reduced instead of the width.
+
+.. seealso:: :ocv:func:`dft`
+
+
+gpu::ConvolveBuf
+----------------
+.. ocv:struct:: gpu::ConvolveBuf
+
+Class providing a memory buffer for :ocv:func:`gpu::convolve` function, plus it allows to adjust some specific parameters. ::
+
+    struct CV_EXPORTS ConvolveBuf
+    {
+        Size result_size;
+        Size block_size;
+        Size user_block_size;
+        Size dft_size;
+        int spect_len;
+
+        GpuMat image_spect, templ_spect, result_spect;
+        GpuMat image_block, templ_block, result_data;
+
+        void create(Size image_size, Size templ_size);
+        static Size estimateBlockSize(Size result_size, Size templ_size);
+    };
+
+You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
+
+gpu::ConvolveBuf::create
+------------------------
+.. ocv:function:: gpu::ConvolveBuf::create(Size image_size, Size templ_size)
+
+Constructs a buffer for :ocv:func:`gpu::convolve` function with respective arguments.
+
+
+gpu::convolve
+-----------------
+Computes a convolution (or cross-correlation) of two images.
+
+.. ocv:function:: void gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr=false)
+
+.. ocv:function:: void gpu::convolve( const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream=Stream::Null() )
+
+    :param image: Source image. Only  ``CV_32FC1`` images are supported for now.
+
+    :param templ: Template image. The size is not greater than the  ``image`` size. The type is the same as  ``image`` .
+
+    :param result: Result image. If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
+
+    :param ccorr: Flags to evaluate cross-correlation instead of convolution.
+
+    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::ConvolveBuf`.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`gpu::filter2D`
+
+
+
+gpu::copyMakeBorder
+-----------------------
+Forms a border around an image.
+
+.. ocv:function:: void gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null())
+
+    :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and  ``CV_32FC1`` types are supported.
+
+    :param dst: Destination image with the same type as  ``src``. The size is  ``Size(src.cols+left+right, src.rows+top+bottom)`` .
+
+    :param top:
+
+    :param bottom:
+
+    :param left:
+
+    :param right: Number of pixels in each direction from the source image rectangle to extrapolate. For example:  ``top=1, bottom=1, left=1, right=1`` mean that 1 pixel-wide border needs to be built.
+
+    :param borderType: Border type. See  :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
+
+    :param value: Border value.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`copyMakeBorder`
diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
index 8957d06d5..bb1a89d03 100644
--- a/modules/gpuarithm/perf/perf_core.cpp
+++ b/modules/gpuarithm/perf/perf_core.cpp
@@ -2411,7 +2411,7 @@ CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv
 
 DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp);
 
-PERF_TEST_P(Sz_Depth_Op, Threshold,
+PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold,
             Combine(GPU_TYPICAL_MAT_SIZES,
             Values(CV_8U, CV_16U, CV_32F, CV_64F),
             ALL_THRESH_OPS))
diff --git a/modules/gpufeatures2d/CMakeLists.txt b/modules/gpufeatures2d/CMakeLists.txt
index 4a93be34a..7162fae15 100644
--- a/modules/gpufeatures2d/CMakeLists.txt
+++ b/modules/gpufeatures2d/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Feature Detection and Description")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_define_module(gpufeatures2d opencv_features2d opencv_gpufilters opencv_gpuimgproc)
+ocv_define_module(gpufeatures2d opencv_features2d opencv_gpufilters opencv_gpuwarping)
diff --git a/modules/gpufeatures2d/src/precomp.hpp b/modules/gpufeatures2d/src/precomp.hpp
index d3936264b..9fbecc71e 100644
--- a/modules/gpufeatures2d/src/precomp.hpp
+++ b/modules/gpufeatures2d/src/precomp.hpp
@@ -49,8 +49,7 @@
 
 #include "opencv2/gpufeatures2d.hpp"
 #include "opencv2/gpuarithm.hpp"
-#include "opencv2/gpuimgproc.hpp"
-
+#include "opencv2/gpuwarping.hpp"
 #include "opencv2/features2d.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
diff --git a/modules/gpuimgproc/doc/image_processing.rst b/modules/gpuimgproc/doc/image_processing.rst
index 69e500374..52d7b92ad 100644
--- a/modules/gpuimgproc/doc/image_processing.rst
+++ b/modules/gpuimgproc/doc/image_processing.rst
@@ -97,18 +97,6 @@ Computes a squared integral image.
 
 
 
-gpu::columnSum
-------------------
-Computes a vertical (column) sum.
-
-.. ocv:function:: void gpu::columnSum(const GpuMat& src, GpuMat& sum)
-
-    :param src: Source image. Only  ``CV_32FC1`` images are supported for now.
-
-    :param sum: Destination image of the  ``CV_32FC1`` type.
-
-
-
 gpu::cornerHarris
 ---------------------
 Computes the Harris cornerness criteria at each image pixel.
@@ -155,139 +143,6 @@ Computes the minimum eigen value of a 2x2 derivative covariation matrix at each
 
 
 
-gpu::mulSpectrums
----------------------
-Performs a per-element multiplication of two Fourier spectrums.
-
-.. ocv:function:: void gpu::mulSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream=Stream::Null() )
-
-    :param a: First spectrum.
-
-    :param b: Second spectrum with the same size and type as  ``a`` .
-
-    :param c: Destination spectrum.
-
-    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
-
-    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
-
-    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
-
-.. seealso:: :ocv:func:`mulSpectrums`
-
-
-
-gpu::mulAndScaleSpectrums
------------------------------
-Performs a per-element multiplication of two Fourier spectrums and scales the result.
-
-.. ocv:function:: void gpu::mulAndScaleSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream=Stream::Null() )
-
-    :param a: First spectrum.
-
-    :param b: Second spectrum with the same size and type as  ``a`` .
-
-    :param c: Destination spectrum.
-
-    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
-
-    :param scale: Scale constant.
-
-    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
-
-    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
-
-.. seealso:: :ocv:func:`mulSpectrums`
-
-
-
-gpu::dft
-------------
-Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix.
-
-.. ocv:function:: void gpu::dft( const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream=Stream::Null() )
-
-    :param src: Source matrix (real or complex).
-
-    :param dst: Destination matrix (real or complex).
-
-    :param dft_size: Size of a discrete Fourier transform.
-
-    :param flags: Optional flags:
-
-        * **DFT_ROWS** transforms each individual row of the source matrix.
-
-        * **DFT_SCALE** scales the result: divide it by the number of elements in the transform (obtained from  ``dft_size`` ).
-
-        * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively).
-
-        * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real.
-
-Use to handle real matrices ( ``CV32FC1`` ) and complex matrices in the interleaved format ( ``CV32FC2`` ).
-
-The source matrix should be continuous, otherwise reallocation and data copying is performed. The function chooses an operation mode depending on the flags, size, and channel count of the source matrix:
-
-    * If the source matrix is complex and the output is not specified as real, the destination matrix is complex and has the ``dft_size``    size and ``CV_32FC2``    type. The destination matrix contains a full result of the DFT (forward or inverse).
-
-    * If the source matrix is complex and the output is specified as real, the function assumes that its input is the result of the forward transform (see the next item). The destination matrix has the ``dft_size`` size and ``CV_32FC1`` type. It contains the result of the inverse DFT.
-
-    * If the source matrix is real (its type is ``CV_32FC1`` ), forward DFT is performed. The result of the DFT is packed into complex ( ``CV_32FC2`` ) matrix. So, the width of the destination matrix is ``dft_size.width / 2 + 1`` . But if the source is a single column, the height is reduced instead of the width.
-
-.. seealso:: :ocv:func:`dft`
-
-
-gpu::ConvolveBuf
-----------------
-.. ocv:struct:: gpu::ConvolveBuf
-
-Class providing a memory buffer for :ocv:func:`gpu::convolve` function, plus it allows to adjust some specific parameters. ::
-
-    struct CV_EXPORTS ConvolveBuf
-    {
-        Size result_size;
-        Size block_size;
-        Size user_block_size;
-        Size dft_size;
-        int spect_len;
-
-        GpuMat image_spect, templ_spect, result_spect;
-        GpuMat image_block, templ_block, result_data;
-
-        void create(Size image_size, Size templ_size);
-        static Size estimateBlockSize(Size result_size, Size templ_size);
-    };
-
-You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
-
-gpu::ConvolveBuf::create
-------------------------
-.. ocv:function:: gpu::ConvolveBuf::create(Size image_size, Size templ_size)
-
-Constructs a buffer for :ocv:func:`gpu::convolve` function with respective arguments.
-
-
-gpu::convolve
------------------
-Computes a convolution (or cross-correlation) of two images.
-
-.. ocv:function:: void gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr=false)
-
-.. ocv:function:: void gpu::convolve( const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream=Stream::Null() )
-
-    :param image: Source image. Only  ``CV_32FC1`` images are supported for now.
-
-    :param templ: Template image. The size is not greater than the  ``image`` size. The type is the same as  ``image`` .
-
-    :param result: Result image. If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
-
-    :param ccorr: Flags to evaluate cross-correlation instead of convolution.
-
-    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::ConvolveBuf`.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`gpu::filter2D`
-
 gpu::MatchTemplateBuf
 ---------------------
 .. ocv:struct:: gpu::MatchTemplateBuf
@@ -305,6 +160,8 @@ Class providing memory buffers for :ocv:func:`gpu::matchTemplate` function, plus
 
 You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::matchTemplate` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
 
+
+
 gpu::matchTemplate
 ----------------------
 Computes a proximity map for a raster template and an image where the template is searched for.
@@ -342,39 +199,6 @@ Computes a proximity map for a raster template and an image where the template i
 .. seealso:: :ocv:func:`matchTemplate`
 
 
-gpu::remap
---------------
-Applies a generic geometrical transformation to an image.
-
-.. ocv:function:: void gpu::remap( const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
-
-    :param src: Source image.
-
-    :param dst: Destination image with the size the same as  ``xmap`` and the type the same as  ``src`` .
-
-    :param xmap: X values. Only  ``CV_32FC1`` type is supported.
-
-    :param ymap: Y values. Only  ``CV_32FC1`` type is supported.
-
-    :param interpolation: Interpolation method (see  :ocv:func:`resize` ). ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` are supported for now.
-
-    :param borderMode: Pixel extrapolation method (see  :ocv:func:`borderInterpolate` ). ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
-
-    :param borderValue: Value used in case of a constant border. By default, it is 0.
-
-    :param stream: Stream for the asynchronous version.
-
-The function transforms the source image using the specified map:
-
-.. math::
-
-    \texttt{dst} (x,y) =  \texttt{src} (xmap(x,y), ymap(x,y))
-
-Values of pixels with non-integer coordinates are computed using the bilinear interpolation.
-
-.. seealso:: :ocv:func:`remap`
-
-
 
 gpu::cvtColor
 -----------------
@@ -414,185 +238,6 @@ The methods support arbitrary permutations of the original channels, including r
 
 
 
-gpu::resize
----------------
-Resizes an image.
-
-.. ocv:function:: void gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
-
-    :param src: Source image.
-
-    :param dst: Destination image  with the same type as  ``src`` . The size is ``dsize`` (when it is non-zero) or the size is computed from  ``src.size()`` , ``fx`` , and  ``fy`` .
-
-    :param dsize: Destination image size. If it is zero, it is computed as:
-
-        .. math::
-            \texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}
-
-        Either  ``dsize`` or both  ``fx`` and  ``fy`` must be non-zero.
-
-    :param fx: Scale factor along the horizontal axis. If it is zero, it is computed as:
-
-        .. math::
-
-            \texttt{(double)dsize.width/src.cols}
-
-    :param fy: Scale factor along the vertical axis. If it is zero, it is computed as:
-
-        .. math::
-
-            \texttt{(double)dsize.height/src.rows}
-
-    :param interpolation: Interpolation method. ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` are supported for now.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`resize`
-
-
-
-gpu::warpAffine
--------------------
-Applies an affine transformation to an image.
-
-.. ocv:function:: void gpu::warpAffine( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
-
-    :param src: Source image.  ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or  ``CV_32F`` depth and 1, 3, or 4 channels are supported.
-
-    :param dst: Destination image with the same type as  ``src`` . The size is  ``dsize`` .
-
-    :param M: *2x3*  transformation matrix.
-
-    :param dsize: Size of the destination image.
-
-    :param flags: Combination of interpolation methods (see  :ocv:func:`resize`) and the optional flag  ``WARP_INVERSE_MAP`` specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ). Only ``INTER_NEAREST`` , ``INTER_LINEAR`` , and  ``INTER_CUBIC`` interpolation methods are supported.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`warpAffine`
-
-
-
-gpu::buildWarpAffineMaps
-------------------------
-Builds transformation maps for affine transformation.
-
-.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null())
-
-    :param M: *2x3*  transformation matrix.
-
-    :param inverse: Flag  specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ).
-
-    :param dsize: Size of the destination image.
-
-    :param xmap: X values with  ``CV_32FC1`` type.
-
-    :param ymap: Y values with  ``CV_32FC1`` type.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`gpu::warpAffine` , :ocv:func:`gpu::remap`
-
-
-
-gpu::warpPerspective
-------------------------
-Applies a perspective transformation to an image.
-
-.. ocv:function:: void gpu::warpPerspective( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
-
-    :param src: Source image. ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or  ``CV_32F`` depth and 1, 3, or 4 channels are supported.
-
-    :param dst: Destination image with the same type as  ``src`` . The size is  ``dsize`` .
-
-    :param M: *3x3* transformation matrix.
-
-    :param dsize: Size of the destination image.
-
-    :param flags: Combination of interpolation methods (see  :ocv:func:`resize` ) and the optional flag  ``WARP_INVERSE_MAP`` specifying that  ``M`` is the inverse transformation ( ``dst => src`` ). Only  ``INTER_NEAREST`` , ``INTER_LINEAR`` , and  ``INTER_CUBIC`` interpolation methods are supported.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`warpPerspective`
-
-
-
-gpu::buildWarpPerspectiveMaps
------------------------------
-Builds transformation maps for perspective transformation.
-
-.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null())
-
-    :param M: *3x3*  transformation matrix.
-
-    :param inverse: Flag  specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ).
-
-    :param dsize: Size of the destination image.
-
-    :param xmap: X values with  ``CV_32FC1`` type.
-
-    :param ymap: Y values with  ``CV_32FC1`` type.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`gpu::warpPerspective` , :ocv:func:`gpu::remap`
-
-
-
-gpu::rotate
----------------
-Rotates an image around the origin (0,0) and then shifts it.
-
-.. ocv:function:: void gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
-
-    :param src: Source image. Supports 1, 3 or 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32F`` depth.
-
-    :param dst: Destination image with the same type as  ``src`` . The size is  ``dsize`` .
-
-    :param dsize: Size of the destination image.
-
-    :param angle: Angle of rotation in degrees.
-
-    :param xShift: Shift along the horizontal axis.
-
-    :param yShift: Shift along the vertical axis.
-
-    :param interpolation: Interpolation method. Only  ``INTER_NEAREST`` , ``INTER_LINEAR`` , and  ``INTER_CUBIC`` are supported.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`gpu::warpAffine`
-
-
-
-gpu::copyMakeBorder
------------------------
-Forms a border around an image.
-
-.. ocv:function:: void gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null())
-
-    :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and  ``CV_32FC1`` types are supported.
-
-    :param dst: Destination image with the same type as  ``src``. The size is  ``Size(src.cols+left+right, src.rows+top+bottom)`` .
-
-    :param top:
-
-    :param bottom:
-
-    :param left:
-
-    :param right: Number of pixels in each direction from the source image rectangle to extrapolate. For example:  ``top=1, bottom=1, left=1, right=1`` mean that 1 pixel-wide border needs to be built.
-
-    :param borderType: Border type. See  :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
-
-    :param value: Border value.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`copyMakeBorder`
-
-
-
 gpu::rectStdDev
 -------------------
 Computes a standard deviation of integral images.
@@ -711,68 +356,6 @@ Equalizes the histogram of a grayscale image.
 
 
 
-gpu::buildWarpPlaneMaps
------------------------
-Builds plane warping maps.
-
-.. ocv:function:: void gpu::buildWarpPlaneMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, const Mat & T, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::buildWarpCylindricalMaps
------------------------------
-Builds cylindrical warping maps.
-
-.. ocv:function:: void gpu::buildWarpCylindricalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::buildWarpSphericalMaps
----------------------------
-Builds spherical warping maps.
-
-.. ocv:function:: void gpu::buildWarpSphericalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::pyrDown
--------------------
-Smoothes an image and downsamples it.
-
-.. ocv:function:: void gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source image.
-
-    :param dst: Destination image. Will have ``Size((src.cols+1)/2, (src.rows+1)/2)`` size and the same type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`pyrDown`
-
-
-
-gpu::pyrUp
--------------------
-Upsamples an image and then smoothes it.
-
-.. ocv:function:: void gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source image.
-
-    :param dst: Destination image. Will have ``Size(src.cols*2, src.rows*2)`` size and the same type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`pyrUp`
-
-
-
 gpu::blendLinear
 -------------------
 Performs linear blending of two images.
@@ -841,6 +424,8 @@ Performs pure non local means denoising without any simplification, and thus it
 
     :ocv:func:`fastNlMeansDenoising`
 
+
+
 gpu::FastNonLocalMeansDenoising
 -------------------------------
 .. ocv:class:: gpu::FastNonLocalMeansDenoising
@@ -858,6 +443,8 @@ gpu::FastNonLocalMeansDenoising
 
 The class implements fast approximate Non Local Means Denoising algorithm.
 
+
+
 gpu::FastNonLocalMeansDenoising::simpleMethod()
 -----------------------------------------------
 Perform image denoising using Non-local Means Denoising algorithm http://www.ipol.im/pub/algo/bcm_non_local_means_denoising with several computational optimizations. Noise expected to be a gaussian white noise
@@ -882,6 +469,8 @@ This function expected to be applied to grayscale images. For colored images loo
 
     :ocv:func:`fastNlMeansDenoising`
 
+
+
 gpu::FastNonLocalMeansDenoising::labMethod()
 --------------------------------------------
 Modification of ``FastNonLocalMeansDenoising::simpleMethod`` for color images
@@ -908,6 +497,8 @@ The function converts image to CIELAB colorspace and then separately denoise L a
 
     :ocv:func:`fastNlMeansDenoisingColored`
 
+
+
 gpu::alphaComp
 -------------------
 Composites two images using alpha opacity values contained in each image.
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
index a0b1e3094..00ef0a3e9 100644
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -60,12 +60,6 @@ enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA
 //! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
 CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
 
-//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
-//! supports only CV_32FC1 map type
-CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
-                      int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(),
-                      Stream& stream = Stream::Null());
-
 //! Does mean shift filtering on GPU.
 CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
                                    TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
@@ -113,42 +107,6 @@ CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& strea
 //! Routines for correcting image color gamma
 CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
 
-//! resizes the image
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
-CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
-
-//! warps the image using affine transformation
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
-    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
-
-CV_EXPORTS void buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
-
-//! warps the image using perspective transformation
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
-    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
-
-CV_EXPORTS void buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
-
-//! builds plane warping maps
-CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, float scale,
-                                   GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
-
-//! builds cylindrical warping maps
-CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                         GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
-
-//! builds spherical warping maps
-CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                       GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
-
-//! rotates an image around the origin (0,0) and then shifts it
-//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
-CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
-                       int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
-
 //! computes Harris cornerness criteria at each image pixel
 CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
 CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
@@ -176,12 +134,6 @@ CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat&
 //! computes the proximity map for the raster template and the image where the template is searched for
 CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null());
 
-//! smoothes the source image and downsamples it
-CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-
-//! upsamples the source image and then smoothes it
-CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-
 //! performs linear blending of two images
 //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
@@ -227,32 +179,6 @@ CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double
 CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
 CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
 
-class CV_EXPORTS ImagePyramid
-{
-public:
-    inline ImagePyramid() : nLayers_(0) {}
-    inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null())
-    {
-        build(img, nLayers, stream);
-    }
-
-    void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null());
-
-    void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const;
-
-    inline void release()
-    {
-        layer0_.release();
-        pyramid_.clear();
-        nLayers_ = 0;
-    }
-
-private:
-    GpuMat layer0_;
-    std::vector<GpuMat> pyramid_;
-    int nLayers_;
-};
-
 //! HoughLines
 
 struct HoughLinesBuf
diff --git a/modules/gpuimgproc/perf/perf_imgproc.cpp b/modules/gpuimgproc/perf/perf_imgproc.cpp
index 349dcc825..ff19e14ed 100644
--- a/modules/gpuimgproc/perf/perf_imgproc.cpp
+++ b/modules/gpuimgproc/perf/perf_imgproc.cpp
@@ -46,323 +46,6 @@ using namespace std;
 using namespace testing;
 using namespace perf;
 
-//////////////////////////////////////////////////////////////////////
-// Remap
-
-enum { HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH };
-CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH);
-
-void generateMap(cv::Mat& map_x, cv::Mat& map_y, int remapMode)
-{
-    for (int j = 0; j < map_x.rows; ++j)
-    {
-        for (int i = 0; i < map_x.cols; ++i)
-        {
-            switch (remapMode)
-            {
-            case HALF_SIZE:
-                if (i > map_x.cols*0.25 && i < map_x.cols*0.75 && j > map_x.rows*0.25 && j < map_x.rows*0.75)
-                {
-                    map_x.at<float>(j,i) = 2.f * (i - map_x.cols * 0.25f) + 0.5f;
-                    map_y.at<float>(j,i) = 2.f * (j - map_x.rows * 0.25f) + 0.5f;
-                }
-                else
-                {
-                    map_x.at<float>(j,i) = 0.f;
-                    map_y.at<float>(j,i) = 0.f;
-                }
-                break;
-            case UPSIDE_DOWN:
-                map_x.at<float>(j,i) = static_cast<float>(i);
-                map_y.at<float>(j,i) = static_cast<float>(map_x.rows - j);
-                break;
-            case REFLECTION_X:
-                map_x.at<float>(j,i) = static_cast<float>(map_x.cols - i);
-                map_y.at<float>(j,i) = static_cast<float>(j);
-                break;
-            case REFLECTION_BOTH:
-                map_x.at<float>(j,i) = static_cast<float>(map_x.cols - i);
-                map_y.at<float>(j,i) = static_cast<float>(map_x.rows - j);
-                break;
-            } // end of switch
-        }
-    }
-}
-
-DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border_Mode, cv::Size, MatDepth, MatCn, Interpolation, BorderMode, RemapMode);
-
-PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4,
-                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-                    ALL_BORDER_MODES,
-                    RemapMode::all()))
-{
-    declare.time(20.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int interpolation = GET_PARAM(3);
-    const int borderMode = GET_PARAM(4);
-    const int remapMode = GET_PARAM(5);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat xmap(size, CV_32FC1);
-    cv::Mat ymap(size, CV_32FC1);
-    generateMap(xmap, ymap, remapMode);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        const cv::gpu::GpuMat d_xmap(xmap);
-        const cv::gpu::GpuMat d_ymap(ymap);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::remap(d_src, dst, d_xmap, d_ymap, interpolation, borderMode);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Resize
-
-DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Scale, cv::Size, MatDepth, MatCn, Interpolation, double);
-
-PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4,
-                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-                    Values(0.5, 0.3, 2.0)))
-{
-    declare.time(20.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int interpolation = GET_PARAM(3);
-    const double f = GET_PARAM(4);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
-
-        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// ResizeArea
-
-DEF_PARAM_TEST(Sz_Depth_Cn_Scale, cv::Size, MatDepth, MatCn, double);
-
-PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4,
-                    Values(0.2, 0.1, 0.05)))
-{
-    declare.time(1.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int interpolation = cv::INTER_AREA;
-    const double f = GET_PARAM(3);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// WarpAffine
-
-DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border, cv::Size, MatDepth, MatCn, Interpolation, BorderMode);
-
-PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4,
-                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-                    ALL_BORDER_MODES))
-{
-    declare.time(20.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int interpolation = GET_PARAM(3);
-    const int borderMode = GET_PARAM(4);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const double aplha = CV_PI / 4;
-    const double mat[2 * 3] =
-    {
-        std::cos(aplha), -std::sin(aplha), src.cols / 2,
-        std::sin(aplha),  std::cos(aplha), 0
-    };
-    const cv::Mat M(2, 3, CV_64F, (void*) mat);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::warpAffine(d_src, dst, M, size, interpolation, borderMode);
-
-        GPU_SANITY_CHECK(dst, 1);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::warpAffine(src, dst, M, size, interpolation, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// WarpPerspective
-
-PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4,
-                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-                    ALL_BORDER_MODES))
-{
-    declare.time(20.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int interpolation = GET_PARAM(3);
-    const int borderMode = GET_PARAM(4);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const double aplha = CV_PI / 4;
-    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0},
-                         {0.0,              0.0,             1.0}};
-    const cv::Mat M(3, 3, CV_64F, (void*) mat);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::warpPerspective(d_src, dst, M, size, interpolation, borderMode);
-
-        GPU_SANITY_CHECK(dst, 1);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Threshold
-
-CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
-
-DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp);
-
-PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-            Values(CV_8U, CV_16U, CV_32F, CV_64F),
-            ThreshOp::all()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int threshOp = GET_PARAM(2);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::threshold(d_src, dst, 100.0, 255.0, threshOp);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // HistEvenC1
 
@@ -892,196 +575,6 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerMinEigenVal,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// BuildWarpPlaneMaps
-
-PERF_TEST_P(Sz, ImgProc_BuildWarpPlaneMaps,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
-    const cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat map_x;
-        cv::gpu::GpuMat map_y;
-
-        TEST_CYCLE() cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
-
-        GPU_SANITY_CHECK(map_x);
-        GPU_SANITY_CHECK(map_y);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BuildWarpCylindricalMaps
-
-PERF_TEST_P(Sz, ImgProc_BuildWarpCylindricalMaps,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat map_x;
-        cv::gpu::GpuMat map_y;
-
-        TEST_CYCLE() cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
-
-        GPU_SANITY_CHECK(map_x);
-        GPU_SANITY_CHECK(map_y);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BuildWarpSphericalMaps
-
-PERF_TEST_P(Sz, ImgProc_BuildWarpSphericalMaps,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat map_x;
-        cv::gpu::GpuMat map_y;
-
-        TEST_CYCLE() cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
-
-        GPU_SANITY_CHECK(map_x);
-        GPU_SANITY_CHECK(map_y);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Rotate
-
-DEF_PARAM_TEST(Sz_Depth_Cn_Inter, cv::Size, MatDepth, MatCn, Interpolation);
-
-PERF_TEST_P(Sz_Depth_Cn_Inter, ImgProc_Rotate,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4,
-                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int interpolation = GET_PARAM(3);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::rotate(d_src, dst, size, 30.0, 0, 0, interpolation);
-
-        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// PyrDown
-
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::pyrDown(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::pyrDown(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// PyrUp
-
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::pyrUp(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::pyrUp(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // CvtColor
 
@@ -1284,82 +777,6 @@ PERF_TEST_P(Sz_Type_Op, AlphaComp,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// ImagePyramidBuild
-
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidBuild,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const int nLayers = 5;
-    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-
-        cv::gpu::ImagePyramid d_pyr;
-
-        TEST_CYCLE() d_pyr.build(d_src, nLayers);
-
-        cv::gpu::GpuMat dst;
-        d_pyr.getLayer(dst, dstSize);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// ImagePyramidGetLayer
-
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidGetLayer,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const int nLayers = 3;
-    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        cv::gpu::ImagePyramid d_pyr(d_src, nLayers);
-
-        TEST_CYCLE() d_pyr.getLayer(dst, dstSize);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // HoughLines
 
diff --git a/modules/gpuimgproc/src/cuda/imgproc.cu b/modules/gpuimgproc/src/cuda/imgproc.cu
index d2d0d0f3c..c47076f44 100644
--- a/modules/gpuimgproc/src/cuda/imgproc.cu
+++ b/modules/gpuimgproc/src/cuda/imgproc.cu
@@ -399,172 +399,6 @@ namespace cv { namespace gpu { namespace cudev
             if (stream == 0)
                 cudaSafeCall(cudaDeviceSynchronize());
         }
-
-        //////////////////////////////////////////////////////////////////////////
-        // buildWarpMaps
-
-        // TODO use intrinsics like __sinf and so on
-
-        namespace build_warp_maps
-        {
-
-            __constant__ float ck_rinv[9];
-            __constant__ float cr_kinv[9];
-            __constant__ float ct[3];
-            __constant__ float cscale;
-        }
-
-
-        class PlaneMapper
-        {
-        public:
-            static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
-            {
-                using namespace build_warp_maps;
-
-                float x_ = u / cscale - ct[0];
-                float y_ = v / cscale - ct[1];
-
-                float z;
-                x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
-                y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
-                z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
-
-                x /= z;
-                y /= z;
-            }
-        };
-
-
-        class CylindricalMapper
-        {
-        public:
-            static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
-            {
-                using namespace build_warp_maps;
-
-                u /= cscale;
-                float x_ = ::sinf(u);
-                float y_ = v / cscale;
-                float z_ = ::cosf(u);
-
-                float z;
-                x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
-                y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
-                z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
-
-                if (z > 0) { x /= z; y /= z; }
-                else x = y = -1;
-            }
-        };
-
-
-        class SphericalMapper
-        {
-        public:
-            static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
-            {
-                using namespace build_warp_maps;
-
-                v /= cscale;
-                u /= cscale;
-
-                float sinv = ::sinf(v);
-                float x_ = sinv * ::sinf(u);
-                float y_ = -::cosf(v);
-                float z_ = sinv * ::cosf(u);
-
-                float z;
-                x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
-                y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
-                z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
-
-                if (z > 0) { x /= z; y /= z; }
-                else x = y = -1;
-            }
-        };
-
-
-        template <typename Mapper>
-        __global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
-                                            PtrStepf map_x, PtrStepf map_y)
-        {
-            int du = blockIdx.x * blockDim.x + threadIdx.x;
-            int dv = blockIdx.y * blockDim.y + threadIdx.y;
-            if (du < cols && dv < rows)
-            {
-                float u = tl_u + du;
-                float v = tl_v + dv;
-                float x, y;
-                Mapper::mapBackward(u, v, x, y);
-                map_x.ptr(dv)[du] = x;
-                map_y.ptr(dv)[du] = y;
-            }
-        }
-
-
-        void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                const float k_rinv[9], const float r_kinv[9], const float t[3],
-                                float scale, cudaStream_t stream)
-        {
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
-
-            int cols = map_x.cols;
-            int rows = map_x.rows;
-
-            dim3 threads(32, 8);
-            dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
-
-            buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
-            cudaSafeCall(cudaGetLastError());
-            if (stream == 0)
-                cudaSafeCall(cudaDeviceSynchronize());
-        }
-
-
-        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                      const float k_rinv[9], const float r_kinv[9], float scale,
-                                      cudaStream_t stream)
-        {
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
-
-            int cols = map_x.cols;
-            int rows = map_x.rows;
-
-            dim3 threads(32, 8);
-            dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
-
-            buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
-            cudaSafeCall(cudaGetLastError());
-            if (stream == 0)
-                cudaSafeCall(cudaDeviceSynchronize());
-        }
-
-
-        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                    const float k_rinv[9], const float r_kinv[9], float scale,
-                                    cudaStream_t stream)
-        {
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
-            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
-
-            int cols = map_x.cols;
-            int rows = map_x.rows;
-
-            dim3 threads(32, 8);
-            dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
-
-            buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
-            cudaSafeCall(cudaGetLastError());
-            if (stream == 0)
-                cudaSafeCall(cudaDeviceSynchronize());
-        }
     } // namespace imgproc
 }}} // namespace cv { namespace gpu { namespace cudev {
 
diff --git a/modules/gpuimgproc/src/imgproc.cpp b/modules/gpuimgproc/src/imgproc.cpp
index dabf054b6..939b14937 100644
--- a/modules/gpuimgproc/src/imgproc.cpp
+++ b/modules/gpuimgproc/src/imgproc.cpp
@@ -49,10 +49,6 @@ using namespace cv::gpu;
 
 void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
 void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
@@ -155,184 +151,6 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
     meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
 }
 
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpPlaneMaps
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
-                                cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
-                                 float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
-{
-    (void)src_size;
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
-    CV_Assert((T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32F && T.isContinuous());
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert(K_Rinv.isContinuous());
-    CV_Assert(R_Kinv.isContinuous());
-
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
-    cudev::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
-                       T.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpCylyndricalMaps
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                      const float k_rinv[9], const float r_kinv[9], float scale,
-                                      cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                       GpuMat& map_x, GpuMat& map_y, Stream& stream)
-{
-    (void)src_size;
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert(K_Rinv.isContinuous());
-    CV_Assert(R_Kinv.isContinuous());
-
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
-    cudev::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpSphericalMaps
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                    const float k_rinv[9], const float r_kinv[9], float scale,
-                                    cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
-                                     GpuMat& map_x, GpuMat& map_y, Stream& stream)
-{
-    (void)src_size;
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
-    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert(K_Rinv.isContinuous());
-    CV_Assert(R_Kinv.isContinuous());
-
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
-    cudev::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// rotate
-
-namespace
-{
-    template<int DEPTH> struct NppTypeTraits;
-    template<> struct NppTypeTraits<CV_8U>  { typedef Npp8u npp_t; };
-    template<> struct NppTypeTraits<CV_8S>  { typedef Npp8s npp_t; };
-    template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
-    template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
-    template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
-    template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
-    template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
-
-    template <int DEPTH> struct NppRotateFunc
-    {
-        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
-
-        typedef NppStatus (*func_t)(const npp_t* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
-                                    npp_t* pDst, int nDstStep, NppiRect oDstROI,
-                                    double nAngle, double nShiftX, double nShiftY, int eInterpolation);
-    };
-
-    template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
-    {
-        typedef typename NppRotateFunc<DEPTH>::npp_t npp_t;
-
-        static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
-        {
-            (void)dsize;
-            static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
-
-            NppStreamHandler h(stream);
-
-            NppiSize srcsz;
-            srcsz.height = src.rows;
-            srcsz.width = src.cols;
-            NppiRect srcroi;
-            srcroi.x = srcroi.y = 0;
-            srcroi.height = src.rows;
-            srcroi.width = src.cols;
-            NppiRect dstroi;
-            dstroi.x = dstroi.y = 0;
-            dstroi.height = dst.rows;
-            dstroi.width = dst.cols;
-
-            nppSafeCall( func(src.ptr<npp_t>(), srcsz, static_cast<int>(src.step), srcroi,
-                dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-}
-
-void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
-{
-    typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
-
-    static const func_t funcs[6][4] =
-    {
-        {NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
-        {0,0,0,0},
-        {NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
-        {0,0,0,0},
-        {0,0,0,0},
-        {NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
-    };
-
-    CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
-    CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
-
-    dst.create(dsize, src.type());
-    dst.setTo(Scalar::all(0));
-
-    funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
-}
-
 
 ////////////////////////////////////////////////////////////////////////
 // Histogram
@@ -344,14 +162,14 @@ namespace
 
     template<int SDEPTH> struct NppHistogramEvenFuncC1
     {
-        typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
 
     typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,
             int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);
     };
     template<int SDEPTH> struct NppHistogramEvenFuncC4
     {
-        typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
 
         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,
             Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);
@@ -420,7 +238,7 @@ namespace
 
     template<int SDEPTH> struct NppHistogramRangeFuncC1
     {
-        typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
         typedef Npp32s level_t;
         enum {LEVEL_TYPE_CODE=CV_32SC1};
 
@@ -438,7 +256,7 @@ namespace
     };
     template<int SDEPTH> struct NppHistogramRangeFuncC4
     {
-        typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
+        typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
         typedef Npp32s level_t;
         enum {LEVEL_TYPE_CODE=CV_32SC1};
 
@@ -1042,14 +860,14 @@ namespace
 {
     template <int DEPTH> struct NppAlphaCompFunc
     {
-        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
+        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
 
         typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, NppiAlphaOp eAlphaOp);
     };
 
     template <int DEPTH, typename NppAlphaCompFunc<DEPTH>::func_t func> struct NppAlphaComp
     {
-        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
+        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
 
         static void call(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream)
         {
diff --git a/modules/gpuimgproc/src/precomp.hpp b/modules/gpuimgproc/src/precomp.hpp
index 7df02aadd..93bcf3a28 100644
--- a/modules/gpuimgproc/src/precomp.hpp
+++ b/modules/gpuimgproc/src/precomp.hpp
@@ -43,9 +43,9 @@
 #ifndef __OPENCV_PRECOMP_H__
 #define __OPENCV_PRECOMP_H__
 
+#include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpufilters.hpp"
 #include "opencv2/gpuarithm.hpp"
-#include "opencv2/gpuimgproc.hpp"
 
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/gpu_private.hpp"
diff --git a/modules/gpuimgproc/test/test_denoising.cpp b/modules/gpuimgproc/test/test_denoising.cpp
index 2f1a93be1..cb4ea1ecc 100644
--- a/modules/gpuimgproc/test/test_denoising.cpp
+++ b/modules/gpuimgproc/test/test_denoising.cpp
@@ -46,53 +46,6 @@
 
 using namespace cvtest;
 
-////////////////////////////////////////////////////////
-// BilateralFilter
-
-PARAM_TEST_CASE(BilateralFilter, cv::gpu::DeviceInfo, cv::Size, MatType)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int kernel_size;
-    float sigma_color;
-    float sigma_spatial;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-
-        kernel_size = 5;
-        sigma_color = 10.f;
-        sigma_spatial = 3.5f;
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(BilateralFilter, Accuracy)
-{
-    cv::Mat src = randomMat(size, type);
-
-    src.convertTo(src, type);
-    cv::gpu::GpuMat dst;
-
-    cv::gpu::bilateralFilter(loadMat(src), dst, kernel_size, sigma_color, sigma_spatial);
-
-    cv::Mat dst_gold;
-    cv::bilateralFilter(src, dst_gold, kernel_size, sigma_color, sigma_spatial);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Denoising, BilateralFilter, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(639, 481)),
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_32FC1), MatType(CV_32FC3))
-    ));
-
 
 ////////////////////////////////////////////////////////
 // Brute Force Non local means
diff --git a/modules/gpuimgproc/test/test_imgproc.cpp b/modules/gpuimgproc/test/test_imgproc.cpp
index 6957f5437..0e66fe03d 100644
--- a/modules/gpuimgproc/test/test_imgproc.cpp
+++ b/modules/gpuimgproc/test/test_imgproc.cpp
@@ -840,4 +840,51 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerMinEigen, testing::Combine(
     testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
     testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
 
+////////////////////////////////////////////////////////
+// BilateralFilter
+
+PARAM_TEST_CASE(BilateralFilter, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int kernel_size;
+    float sigma_color;
+    float sigma_spatial;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+
+        kernel_size = 5;
+        sigma_color = 10.f;
+        sigma_spatial = 3.5f;
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(BilateralFilter, Accuracy)
+{
+    cv::Mat src = randomMat(size, type);
+
+    src.convertTo(src, type);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bilateralFilter(loadMat(src), dst, kernel_size, sigma_color, sigma_spatial);
+
+    cv::Mat dst_gold;
+    cv::bilateralFilter(src, dst_gold, kernel_size, sigma_color, sigma_spatial);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Denoising, BilateralFilter, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(639, 481)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_32FC1), MatType(CV_32FC3))
+    ));
+
 #endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_precomp.hpp b/modules/gpuimgproc/test/test_precomp.hpp
index a80f5e5f4..4196aa9fe 100644
--- a/modules/gpuimgproc/test/test_precomp.hpp
+++ b/modules/gpuimgproc/test/test_precomp.hpp
@@ -58,6 +58,4 @@
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/imgproc.hpp"
 
-#include "interpolation.hpp"
-
 #endif
diff --git a/modules/gpuobjdetect/CMakeLists.txt b/modules/gpuobjdetect/CMakeLists.txt
index 5bce4d283..bcc2242c5 100644
--- a/modules/gpuobjdetect/CMakeLists.txt
+++ b/modules/gpuobjdetect/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Object Detection")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuobjdetect opencv_objdetect opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
+ocv_define_module(gpuobjdetect opencv_objdetect opencv_gpuwarping opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuobjdetect/src/precomp.hpp b/modules/gpuobjdetect/src/precomp.hpp
index 003df2eca..40fd6c46d 100644
--- a/modules/gpuobjdetect/src/precomp.hpp
+++ b/modules/gpuobjdetect/src/precomp.hpp
@@ -44,6 +44,7 @@
 #define __OPENCV_PRECOMP_H__
 
 #include "opencv2/gpuobjdetect.hpp"
+#include "opencv2/gpuwarping.hpp"
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpuarithm.hpp"
 
diff --git a/modules/gpuvideo/CMakeLists.txt b/modules/gpuvideo/CMakeLists.txt
index 4e6264aed..6c15bd147 100644
--- a/modules/gpuvideo/CMakeLists.txt
+++ b/modules/gpuvideo/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Video Analysis")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpuvideo opencv_video opencv_legacy opencv_gpufilters opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
+ocv_define_module(gpuvideo opencv_video opencv_legacy opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuvideo/src/precomp.hpp b/modules/gpuvideo/src/precomp.hpp
index 1e37cf77d..a1e1b47d5 100644
--- a/modules/gpuvideo/src/precomp.hpp
+++ b/modules/gpuvideo/src/precomp.hpp
@@ -49,6 +49,7 @@
 
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
+#include "opencv2/gpuwarping.hpp"
 #include "opencv2/gpuimgproc.hpp"
 
 #include "opencv2/video.hpp"
diff --git a/modules/gpuwarping/CMakeLists.txt b/modules/gpuwarping/CMakeLists.txt
new file mode 100644
index 000000000..0c4ca7b12
--- /dev/null
+++ b/modules/gpuwarping/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpuwarping)
+endif()
+
+set(the_description "GPU-accelerated Image Warping")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpuwarping opencv_imgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuwarping/doc/gpuwarping.rst b/modules/gpuwarping/doc/gpuwarping.rst
new file mode 100644
index 000000000..4bdaa8d4e
--- /dev/null
+++ b/modules/gpuwarping/doc/gpuwarping.rst
@@ -0,0 +1,8 @@
+*****************************************
+gpuwarping. GPU-accelerated Image Warping
+*****************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    warping
diff --git a/modules/gpuwarping/doc/warping.rst b/modules/gpuwarping/doc/warping.rst
new file mode 100644
index 000000000..b2c95e2b0
--- /dev/null
+++ b/modules/gpuwarping/doc/warping.rst
@@ -0,0 +1,251 @@
+Image Warping
+=============
+
+.. highlight:: cpp
+
+
+
+gpu::remap
+--------------
+Applies a generic geometrical transformation to an image.
+
+.. ocv:function:: void gpu::remap( const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
+
+    :param src: Source image.
+
+    :param dst: Destination image with the size the same as  ``xmap`` and the type the same as  ``src`` .
+
+    :param xmap: X values. Only  ``CV_32FC1`` type is supported.
+
+    :param ymap: Y values. Only  ``CV_32FC1`` type is supported.
+
+    :param interpolation: Interpolation method (see  :ocv:func:`resize` ). ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` are supported for now.
+
+    :param borderMode: Pixel extrapolation method (see  :ocv:func:`borderInterpolate` ). ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
+
+    :param borderValue: Value used in case of a constant border. By default, it is 0.
+
+    :param stream: Stream for the asynchronous version.
+
+The function transforms the source image using the specified map:
+
+.. math::
+
+    \texttt{dst} (x,y) =  \texttt{src} (xmap(x,y), ymap(x,y))
+
+Values of pixels with non-integer coordinates are computed using the bilinear interpolation.
+
+.. seealso:: :ocv:func:`remap`
+
+
+
+gpu::resize
+---------------
+Resizes an image.
+
+.. ocv:function:: void gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
+
+    :param src: Source image.
+
+    :param dst: Destination image  with the same type as  ``src`` . The size is ``dsize`` (when it is non-zero) or the size is computed from  ``src.size()`` , ``fx`` , and  ``fy`` .
+
+    :param dsize: Destination image size. If it is zero, it is computed as:
+
+        .. math::
+            \texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}
+
+        Either  ``dsize`` or both  ``fx`` and  ``fy`` must be non-zero.
+
+    :param fx: Scale factor along the horizontal axis. If it is zero, it is computed as:
+
+        .. math::
+
+            \texttt{(double)dsize.width/src.cols}
+
+    :param fy: Scale factor along the vertical axis. If it is zero, it is computed as:
+
+        .. math::
+
+            \texttt{(double)dsize.height/src.rows}
+
+    :param interpolation: Interpolation method. ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` are supported for now.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`resize`
+
+
+
+gpu::warpAffine
+-------------------
+Applies an affine transformation to an image.
+
+.. ocv:function:: void gpu::warpAffine( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
+
+    :param src: Source image.  ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or  ``CV_32F`` depth and 1, 3, or 4 channels are supported.
+
+    :param dst: Destination image with the same type as  ``src`` . The size is  ``dsize`` .
+
+    :param M: *2x3*  transformation matrix.
+
+    :param dsize: Size of the destination image.
+
+    :param flags: Combination of interpolation methods (see  :ocv:func:`resize`) and the optional flag  ``WARP_INVERSE_MAP`` specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ). Only ``INTER_NEAREST`` , ``INTER_LINEAR`` , and  ``INTER_CUBIC`` interpolation methods are supported.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`warpAffine`
+
+
+
+gpu::buildWarpAffineMaps
+------------------------
+Builds transformation maps for affine transformation.
+
+.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null())
+
+    :param M: *2x3*  transformation matrix.
+
+    :param inverse: Flag  specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ).
+
+    :param dsize: Size of the destination image.
+
+    :param xmap: X values with  ``CV_32FC1`` type.
+
+    :param ymap: Y values with  ``CV_32FC1`` type.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`gpu::warpAffine` , :ocv:func:`gpu::remap`
+
+
+
+gpu::warpPerspective
+------------------------
+Applies a perspective transformation to an image.
+
+.. ocv:function:: void gpu::warpPerspective( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() )
+
+    :param src: Source image. ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or  ``CV_32F`` depth and 1, 3, or 4 channels are supported.
+
+    :param dst: Destination image with the same type as  ``src`` . The size is  ``dsize`` .
+
+    :param M: *3x3* transformation matrix.
+
+    :param dsize: Size of the destination image.
+
+    :param flags: Combination of interpolation methods (see  :ocv:func:`resize` ) and the optional flag  ``WARP_INVERSE_MAP`` specifying that  ``M`` is the inverse transformation ( ``dst => src`` ). Only  ``INTER_NEAREST`` , ``INTER_LINEAR`` , and  ``INTER_CUBIC`` interpolation methods are supported.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`warpPerspective`
+
+
+
+gpu::buildWarpPerspectiveMaps
+-----------------------------
+Builds transformation maps for perspective transformation.
+
+.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null())
+
+    :param M: *3x3*  transformation matrix.
+
+    :param inverse: Flag  specifying that  ``M`` is an inverse transformation ( ``dst=>src`` ).
+
+    :param dsize: Size of the destination image.
+
+    :param xmap: X values with  ``CV_32FC1`` type.
+
+    :param ymap: Y values with  ``CV_32FC1`` type.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`gpu::warpPerspective` , :ocv:func:`gpu::remap`
+
+
+
+gpu::rotate
+---------------
+Rotates an image around the origin (0,0) and then shifts it.
+
+.. ocv:function:: void gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
+
+    :param src: Source image. Supports 1, 3 or 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32F`` depth.
+
+    :param dst: Destination image with the same type as  ``src`` . The size is  ``dsize`` .
+
+    :param dsize: Size of the destination image.
+
+    :param angle: Angle of rotation in degrees.
+
+    :param xShift: Shift along the horizontal axis.
+
+    :param yShift: Shift along the vertical axis.
+
+    :param interpolation: Interpolation method. Only  ``INTER_NEAREST`` , ``INTER_LINEAR`` , and  ``INTER_CUBIC`` are supported.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`gpu::warpAffine`
+
+
+
+gpu::buildWarpPlaneMaps
+-----------------------
+Builds plane warping maps.
+
+.. ocv:function:: void gpu::buildWarpPlaneMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, const Mat & T, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::buildWarpCylindricalMaps
+-----------------------------
+Builds cylindrical warping maps.
+
+.. ocv:function:: void gpu::buildWarpCylindricalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::buildWarpSphericalMaps
+---------------------------
+Builds spherical warping maps.
+
+.. ocv:function:: void gpu::buildWarpSphericalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() )
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::pyrDown
+-------------------
+Smoothes an image and downsamples it.
+
+.. ocv:function:: void gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
+
+    :param src: Source image.
+
+    :param dst: Destination image. Will have ``Size((src.cols+1)/2, (src.rows+1)/2)`` size and the same type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`pyrDown`
+
+
+
+gpu::pyrUp
+-------------------
+Upsamples an image and then smoothes it.
+
+.. ocv:function:: void gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
+
+    :param src: Source image.
+
+    :param dst: Destination image. Will have ``Size(src.cols*2, src.rows*2)`` size and the same type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`pyrUp`
diff --git a/modules/gpuwarping/include/opencv2/gpuwarping.hpp b/modules/gpuwarping/include/opencv2/gpuwarping.hpp
new file mode 100644
index 000000000..ada180d9f
--- /dev/null
+++ b/modules/gpuwarping/include/opencv2/gpuwarping.hpp
@@ -0,0 +1,131 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUWARPING_HPP__
+#define __OPENCV_GPUWARPING_HPP__
+
+#ifndef __cplusplus
+#  error gpuwarping.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/gpumat.hpp"
+#include "opencv2/imgproc.hpp"
+
+namespace cv { namespace gpu {
+
+//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
+//! supports only CV_32FC1 map type
+CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
+                      int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(),
+                      Stream& stream = Stream::Null());
+
+//! resizes the image
+//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
+CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
+
+//! warps the image using affine transformation
+//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
+
+CV_EXPORTS void buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+
+//! warps the image using perspective transformation
+//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
+
+CV_EXPORTS void buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+
+//! builds plane warping maps
+CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, float scale,
+                                   GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+
+//! builds cylindrical warping maps
+CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+                                         GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+
+//! builds spherical warping maps
+CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+                                       GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+
+//! rotates an image around the origin (0,0) and then shifts it
+//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
+CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
+                       int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
+
+//! smoothes the source image and downsamples it
+CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+//! upsamples the source image and then smoothes it
+CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+
+class CV_EXPORTS ImagePyramid
+{
+public:
+    inline ImagePyramid() : nLayers_(0) {}
+    inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null())
+    {
+        build(img, nLayers, stream);
+    }
+
+    void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null());
+
+    void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const;
+
+    inline void release()
+    {
+        layer0_.release();
+        pyramid_.clear();
+        nLayers_ = 0;
+    }
+
+private:
+    GpuMat layer0_;
+    std::vector<GpuMat> pyramid_;
+    int nLayers_;
+};
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUWARPING_HPP__ */
diff --git a/modules/gpuwarping/perf/perf_main.cpp b/modules/gpuwarping/perf/perf_main.cpp
new file mode 100644
index 000000000..a7c1d5c85
--- /dev/null
+++ b/modules/gpuwarping/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpuwarping, printCudaInfo())
diff --git a/modules/gpuwarping/perf/perf_precomp.cpp b/modules/gpuwarping/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpuwarping/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpuwarping/perf/perf_precomp.hpp b/modules/gpuwarping/perf/perf_precomp.hpp
new file mode 100644
index 000000000..fc46b2647
--- /dev/null
+++ b/modules/gpuwarping/perf/perf_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpuwarping.hpp"
+#include "opencv2/imgproc.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpuwarping/perf/perf_warping.cpp b/modules/gpuwarping/perf/perf_warping.cpp
new file mode 100644
index 000000000..fd555cbe5
--- /dev/null
+++ b/modules/gpuwarping/perf/perf_warping.cpp
@@ -0,0 +1,592 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// Remap
+
+enum { HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH };
+CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH)
+
+void generateMap(cv::Mat& map_x, cv::Mat& map_y, int remapMode)
+{
+    for (int j = 0; j < map_x.rows; ++j)
+    {
+        for (int i = 0; i < map_x.cols; ++i)
+        {
+            switch (remapMode)
+            {
+            case HALF_SIZE:
+                if (i > map_x.cols*0.25 && i < map_x.cols*0.75 && j > map_x.rows*0.25 && j < map_x.rows*0.75)
+                {
+                    map_x.at<float>(j,i) = 2.f * (i - map_x.cols * 0.25f) + 0.5f;
+                    map_y.at<float>(j,i) = 2.f * (j - map_x.rows * 0.25f) + 0.5f;
+                }
+                else
+                {
+                    map_x.at<float>(j,i) = 0.f;
+                    map_y.at<float>(j,i) = 0.f;
+                }
+                break;
+            case UPSIDE_DOWN:
+                map_x.at<float>(j,i) = static_cast<float>(i);
+                map_y.at<float>(j,i) = static_cast<float>(map_x.rows - j);
+                break;
+            case REFLECTION_X:
+                map_x.at<float>(j,i) = static_cast<float>(map_x.cols - i);
+                map_y.at<float>(j,i) = static_cast<float>(j);
+                break;
+            case REFLECTION_BOTH:
+                map_x.at<float>(j,i) = static_cast<float>(map_x.cols - i);
+                map_y.at<float>(j,i) = static_cast<float>(map_x.rows - j);
+                break;
+            } // end of switch
+        }
+    }
+}
+
+DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border_Mode, cv::Size, MatDepth, MatCn, Interpolation, BorderMode, RemapMode);
+
+PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, Remap,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    ALL_BORDER_MODES,
+                    RemapMode::all()))
+{
+    declare.time(20.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const int borderMode = GET_PARAM(4);
+    const int remapMode = GET_PARAM(5);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Mat xmap(size, CV_32FC1);
+    cv::Mat ymap(size, CV_32FC1);
+    generateMap(xmap, ymap, remapMode);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_xmap(xmap);
+        const cv::gpu::GpuMat d_ymap(ymap);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::remap(d_src, dst, d_xmap, d_ymap, interpolation, borderMode);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Resize
+
+DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Scale, cv::Size, MatDepth, MatCn, Interpolation, double);
+
+PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, Resize,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    Values(0.5, 0.3, 2.0)))
+{
+    declare.time(20.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const double f = GET_PARAM(4);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
+
+        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// ResizeArea
+
+DEF_PARAM_TEST(Sz_Depth_Cn_Scale, cv::Size, MatDepth, MatCn, double);
+
+PERF_TEST_P(Sz_Depth_Cn_Scale, ResizeArea,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(0.2, 0.1, 0.05)))
+{
+    declare.time(1.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = cv::INTER_AREA;
+    const double f = GET_PARAM(3);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// WarpAffine
+
+DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border, cv::Size, MatDepth, MatCn, Interpolation, BorderMode);
+
+PERF_TEST_P(Sz_Depth_Cn_Inter_Border, WarpAffine,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    ALL_BORDER_MODES))
+{
+    declare.time(20.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const int borderMode = GET_PARAM(4);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    const double aplha = CV_PI / 4;
+    const double mat[2 * 3] =
+    {
+        std::cos(aplha), -std::sin(aplha), src.cols / 2,
+        std::sin(aplha),  std::cos(aplha), 0
+    };
+    const cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::warpAffine(d_src, dst, M, size, interpolation, borderMode);
+
+        GPU_SANITY_CHECK(dst, 1);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::warpAffine(src, dst, M, size, interpolation, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// WarpPerspective
+
+PERF_TEST_P(Sz_Depth_Cn_Inter_Border, WarpPerspective,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    ALL_BORDER_MODES))
+{
+    declare.time(20.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const int borderMode = GET_PARAM(4);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    const double aplha = CV_PI / 4;
+    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0},
+                         {0.0,              0.0,             1.0}};
+    const cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::warpPerspective(d_src, dst, M, size, interpolation, borderMode);
+
+        GPU_SANITY_CHECK(dst, 1);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpPlaneMaps
+
+PERF_TEST_P(Sz, BuildWarpPlaneMaps,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    const cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat map_x;
+        cv::gpu::GpuMat map_y;
+
+        TEST_CYCLE() cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
+
+        GPU_SANITY_CHECK(map_x);
+        GPU_SANITY_CHECK(map_y);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpCylindricalMaps
+
+PERF_TEST_P(Sz, BuildWarpCylindricalMaps,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat map_x;
+        cv::gpu::GpuMat map_y;
+
+        TEST_CYCLE() cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+
+        GPU_SANITY_CHECK(map_x);
+        GPU_SANITY_CHECK(map_y);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpSphericalMaps
+
+PERF_TEST_P(Sz, BuildWarpSphericalMaps,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat map_x;
+        cv::gpu::GpuMat map_y;
+
+        TEST_CYCLE() cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+
+        GPU_SANITY_CHECK(map_x);
+        GPU_SANITY_CHECK(map_y);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Rotate
+
+DEF_PARAM_TEST(Sz_Depth_Cn_Inter, cv::Size, MatDepth, MatCn, Interpolation);
+
+PERF_TEST_P(Sz_Depth_Cn_Inter, Rotate,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::rotate(d_src, dst, size, 30.0, 0, 0, interpolation);
+
+        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// PyrDown
+
+PERF_TEST_P(Sz_Depth_Cn, PyrDown,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::pyrDown(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::pyrDown(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// PyrUp
+
+PERF_TEST_P(Sz_Depth_Cn, PyrUp,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::pyrUp(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::pyrUp(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// ImagePyramidBuild
+
+PERF_TEST_P(Sz_Depth_Cn, ImagePyramidBuild,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    const int nLayers = 5;
+    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+
+        cv::gpu::ImagePyramid d_pyr;
+
+        TEST_CYCLE() d_pyr.build(d_src, nLayers);
+
+        cv::gpu::GpuMat dst;
+        d_pyr.getLayer(dst, dstSize);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// ImagePyramidGetLayer
+
+PERF_TEST_P(Sz_Depth_Cn, ImagePyramidGetLayer,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    const int nLayers = 3;
+    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        cv::gpu::ImagePyramid d_pyr(d_src, nLayers);
+
+        TEST_CYCLE() d_pyr.getLayer(dst, dstSize);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
diff --git a/modules/gpuwarping/src/cuda/build_warp_maps.cu b/modules/gpuwarping/src/cuda/build_warp_maps.cu
new file mode 100644
index 000000000..6bd4e335b
--- /dev/null
+++ b/modules/gpuwarping/src/cuda/build_warp_maps.cu
@@ -0,0 +1,221 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/border_interpolate.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        // TODO use intrinsics like __sinf and so on
+
+        namespace build_warp_maps
+        {
+
+            __constant__ float ck_rinv[9];
+            __constant__ float cr_kinv[9];
+            __constant__ float ct[3];
+            __constant__ float cscale;
+        }
+
+
+        class PlaneMapper
+        {
+        public:
+            static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
+            {
+                using namespace build_warp_maps;
+
+                float x_ = u / cscale - ct[0];
+                float y_ = v / cscale - ct[1];
+
+                float z;
+                x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
+                y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
+                z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
+
+                x /= z;
+                y /= z;
+            }
+        };
+
+
+        class CylindricalMapper
+        {
+        public:
+            static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
+            {
+                using namespace build_warp_maps;
+
+                u /= cscale;
+                float x_ = ::sinf(u);
+                float y_ = v / cscale;
+                float z_ = ::cosf(u);
+
+                float z;
+                x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
+                y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
+                z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
+
+                if (z > 0) { x /= z; y /= z; }
+                else x = y = -1;
+            }
+        };
+
+
+        class SphericalMapper
+        {
+        public:
+            static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
+            {
+                using namespace build_warp_maps;
+
+                v /= cscale;
+                u /= cscale;
+
+                float sinv = ::sinf(v);
+                float x_ = sinv * ::sinf(u);
+                float y_ = -::cosf(v);
+                float z_ = sinv * ::cosf(u);
+
+                float z;
+                x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
+                y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
+                z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
+
+                if (z > 0) { x /= z; y /= z; }
+                else x = y = -1;
+            }
+        };
+
+
+        template <typename Mapper>
+        __global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
+                                            PtrStepf map_x, PtrStepf map_y)
+        {
+            int du = blockIdx.x * blockDim.x + threadIdx.x;
+            int dv = blockIdx.y * blockDim.y + threadIdx.y;
+            if (du < cols && dv < rows)
+            {
+                float u = tl_u + du;
+                float v = tl_v + dv;
+                float x, y;
+                Mapper::mapBackward(u, v, x, y);
+                map_x.ptr(dv)[du] = x;
+                map_y.ptr(dv)[du] = y;
+            }
+        }
+
+
+        void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                const float k_rinv[9], const float r_kinv[9], const float t[3],
+                                float scale, cudaStream_t stream)
+        {
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
+
+            int cols = map_x.cols;
+            int rows = map_x.rows;
+
+            dim3 threads(32, 8);
+            dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
+
+            buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
+            cudaSafeCall(cudaGetLastError());
+            if (stream == 0)
+                cudaSafeCall(cudaDeviceSynchronize());
+        }
+
+
+        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                      const float k_rinv[9], const float r_kinv[9], float scale,
+                                      cudaStream_t stream)
+        {
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
+
+            int cols = map_x.cols;
+            int rows = map_x.rows;
+
+            dim3 threads(32, 8);
+            dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
+
+            buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
+            cudaSafeCall(cudaGetLastError());
+            if (stream == 0)
+                cudaSafeCall(cudaDeviceSynchronize());
+        }
+
+
+        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                    const float k_rinv[9], const float r_kinv[9], float scale,
+                                    cudaStream_t stream)
+        {
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
+            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
+
+            int cols = map_x.cols;
+            int rows = map_x.rows;
+
+            dim3 threads(32, 8);
+            dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
+
+            buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
+            cudaSafeCall(cudaGetLastError());
+            if (stream == 0)
+                cudaSafeCall(cudaDeviceSynchronize());
+        }
+    } // namespace imgproc
+}}} // namespace cv { namespace gpu { namespace cudev {
+
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/pyr_down.cu b/modules/gpuwarping/src/cuda/pyr_down.cu
similarity index 100%
rename from modules/gpuimgproc/src/cuda/pyr_down.cu
rename to modules/gpuwarping/src/cuda/pyr_down.cu
diff --git a/modules/gpuimgproc/src/cuda/pyr_up.cu b/modules/gpuwarping/src/cuda/pyr_up.cu
similarity index 100%
rename from modules/gpuimgproc/src/cuda/pyr_up.cu
rename to modules/gpuwarping/src/cuda/pyr_up.cu
diff --git a/modules/gpuimgproc/src/cuda/remap.cu b/modules/gpuwarping/src/cuda/remap.cu
similarity index 100%
rename from modules/gpuimgproc/src/cuda/remap.cu
rename to modules/gpuwarping/src/cuda/remap.cu
diff --git a/modules/gpuimgproc/src/cuda/resize.cu b/modules/gpuwarping/src/cuda/resize.cu
similarity index 100%
rename from modules/gpuimgproc/src/cuda/resize.cu
rename to modules/gpuwarping/src/cuda/resize.cu
diff --git a/modules/gpuimgproc/src/cuda/warp.cu b/modules/gpuwarping/src/cuda/warp.cu
similarity index 100%
rename from modules/gpuimgproc/src/cuda/warp.cu
rename to modules/gpuwarping/src/cuda/warp.cu
diff --git a/modules/gpuwarping/src/precomp.cpp b/modules/gpuwarping/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpuwarping/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpuwarping/src/precomp.hpp b/modules/gpuwarping/src/precomp.hpp
new file mode 100644
index 000000000..52cc69a34
--- /dev/null
+++ b/modules/gpuwarping/src/precomp.hpp
@@ -0,0 +1,57 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include "opencv2/gpuwarping.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPULEGACY
+#  include "opencv2/gpulegacy.hpp"
+#  include "opencv2/gpulegacy/private.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuimgproc/src/pyramids.cpp b/modules/gpuwarping/src/pyramids.cpp
similarity index 97%
rename from modules/gpuimgproc/src/pyramids.cpp
rename to modules/gpuwarping/src/pyramids.cpp
index 9e9fbe343..91b568d70 100644
--- a/modules/gpuimgproc/src/pyramids.cpp
+++ b/modules/gpuwarping/src/pyramids.cpp
@@ -45,7 +45,9 @@
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
 void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::ImagePyramid::build(const GpuMat&, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::ImagePyramid::getLayer(GpuMat&, Size, Stream&) const { throw_no_cuda(); }
 
@@ -130,17 +132,14 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
 //////////////////////////////////////////////////////////////////////////////
 // ImagePyramid
 
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace pyramid
-    {
-        template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-    }
-}}}
-
 void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stream)
 {
+#ifndef HAVE_OPENCV_GPULEGACY
+    (void) img;
+    (void) numLayers;
+    (void) stream;
+    throw_no_cuda();
+#else
     using namespace cv::gpu::cudev::pyramid;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -185,10 +184,17 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre
 
         szLastLayer = szCurLayer;
     }
+#endif
 }
 
 void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream) const
 {
+#ifndef HAVE_OPENCV_GPULEGACY
+    (void) outImg;
+    (void) outRoi;
+    (void) stream;
+    throw_no_cuda();
+#else
     using namespace cv::gpu::cudev::pyramid;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -244,6 +250,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
     }
 
     func(lastLayer, outImg, StreamAccessor::getStream(stream));
+#endif
 }
 
 #endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/src/remap.cpp b/modules/gpuwarping/src/remap.cpp
similarity index 100%
rename from modules/gpuimgproc/src/remap.cpp
rename to modules/gpuwarping/src/remap.cpp
diff --git a/modules/gpuimgproc/src/resize.cpp b/modules/gpuwarping/src/resize.cpp
similarity index 96%
rename from modules/gpuimgproc/src/resize.cpp
rename to modules/gpuwarping/src/resize.cpp
index 32afa54de..68708b41a 100644
--- a/modules/gpuimgproc/src/resize.cpp
+++ b/modules/gpuwarping/src/resize.cpp
@@ -44,18 +44,7 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
-{
-    (void)src;
-    (void)dst;
-    (void)dsize;
-    (void)fx;
-    (void)fy;
-    (void)interpolation;
-    (void)s;
-
-    throw_no_cuda();
-}
+void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
diff --git a/modules/gpuimgproc/src/warp.cpp b/modules/gpuwarping/src/warp.cpp
similarity index 70%
rename from modules/gpuimgproc/src/warp.cpp
rename to modules/gpuwarping/src/warp.cpp
index 007091e6a..e15c11b74 100644
--- a/modules/gpuimgproc/src/warp.cpp
+++ b/modules/gpuwarping/src/warp.cpp
@@ -42,8 +42,10 @@
 
 #include "precomp.hpp"
 
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+using namespace cv;
+using namespace cv::gpu;
 
+#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
 void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpAffineMaps(const Mat&, bool, Size, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
@@ -51,6 +53,12 @@ void cv::gpu::buildWarpAffineMaps(const Mat&, bool, Size, GpuMat&, GpuMat&, Stre
 void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
 void cv::gpu::buildWarpPerspectiveMaps(const Mat&, bool, Size, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
+void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
+
 #else // HAVE_CUDA
 
 namespace cv { namespace gpu { namespace cudev
@@ -121,27 +129,18 @@ void cv::gpu::buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, G
 
 namespace
 {
-    template<int DEPTH> struct NppTypeTraits;
-    template<> struct NppTypeTraits<CV_8U>  { typedef Npp8u npp_t; };
-    template<> struct NppTypeTraits<CV_8S>  { typedef Npp8s npp_t; };
-    template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
-    template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; typedef Npp16sc npp_complex_type; };
-    template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; typedef Npp32sc npp_complex_type; };
-    template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; typedef Npp32fc npp_complex_type; };
-    template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; typedef Npp64fc npp_complex_type; };
-
     template <int DEPTH> struct NppWarpFunc
     {
-        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
+        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
 
-        typedef NppStatus (*func_t)(const npp_t* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_t* pDst,
+        typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
                                     int dstStep, NppiRect dstRoi, const double coeffs[][3],
                                     int interpolation);
     };
 
     template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
     {
-        typedef typename NppWarpFunc<DEPTH>::npp_t npp_t;
+        typedef typename NppWarpFunc<DEPTH>::npp_type npp_type;
 
         static void call(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
         {
@@ -165,8 +164,8 @@ namespace
 
             cv::gpu::NppStreamHandler h(stream);
 
-            nppSafeCall( func(src.ptr<npp_t>(), srcsz, static_cast<int>(src.step), srcroi,
-                              dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi,
+            nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
+                              dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
                               coeffs, npp_inter[interpolation]) );
 
             if (stream == 0)
@@ -451,4 +450,173 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
     }
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// buildWarpPlaneMaps
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
+                                cudaStream_t stream);
+    }
+}}}
+
+void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
+                                 float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
+{
+    (void)src_size;
+    using namespace ::cv::gpu::cudev::imgproc;
+
+    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
+    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
+    CV_Assert((T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32F && T.isContinuous());
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert(K_Rinv.isContinuous());
+    CV_Assert(R_Kinv.isContinuous());
+
+    map_x.create(dst_roi.size(), CV_32F);
+    map_y.create(dst_roi.size(), CV_32F);
+    cudev::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
+                       T.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// buildWarpCylyndricalMaps
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                      const float k_rinv[9], const float r_kinv[9], float scale,
+                                      cudaStream_t stream);
+    }
+}}}
+
+void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+                                       GpuMat& map_x, GpuMat& map_y, Stream& stream)
+{
+    (void)src_size;
+    using namespace ::cv::gpu::cudev::imgproc;
+
+    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
+    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert(K_Rinv.isContinuous());
+    CV_Assert(R_Kinv.isContinuous());
+
+    map_x.create(dst_roi.size(), CV_32F);
+    map_y.create(dst_roi.size(), CV_32F);
+    cudev::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// buildWarpSphericalMaps
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                    const float k_rinv[9], const float r_kinv[9], float scale,
+                                    cudaStream_t stream);
+    }
+}}}
+
+void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+                                     GpuMat& map_x, GpuMat& map_y, Stream& stream)
+{
+    (void)src_size;
+    using namespace ::cv::gpu::cudev::imgproc;
+
+    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
+    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert(K_Rinv.isContinuous());
+    CV_Assert(R_Kinv.isContinuous());
+
+    map_x.create(dst_roi.size(), CV_32F);
+    map_y.create(dst_roi.size(), CV_32F);
+    cudev::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
+////////////////////////////////////////////////////////////////////////
+// rotate
+
+namespace
+{
+    template <int DEPTH> struct NppRotateFunc
+    {
+        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
+
+        typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
+                                    npp_type* pDst, int nDstStep, NppiRect oDstROI,
+                                    double nAngle, double nShiftX, double nShiftY, int eInterpolation);
+    };
+
+    template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
+    {
+        typedef typename NppRotateFunc<DEPTH>::npp_type npp_type;
+
+        static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
+        {
+            (void)dsize;
+            static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
+
+            NppStreamHandler h(stream);
+
+            NppiSize srcsz;
+            srcsz.height = src.rows;
+            srcsz.width = src.cols;
+            NppiRect srcroi;
+            srcroi.x = srcroi.y = 0;
+            srcroi.height = src.rows;
+            srcroi.width = src.cols;
+            NppiRect dstroi;
+            dstroi.x = dstroi.y = 0;
+            dstroi.height = dst.rows;
+            dstroi.width = dst.cols;
+
+            nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
+                dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+}
+
+void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
+
+    static const func_t funcs[6][4] =
+    {
+        {NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
+        {0,0,0,0},
+        {NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
+        {0,0,0,0},
+        {0,0,0,0},
+        {NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
+    };
+
+    CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
+    CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
+    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
+
+    dst.create(dsize, src.type());
+    dst.setTo(Scalar::all(0));
+
+    funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
+}
+
 #endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/interpolation.hpp b/modules/gpuwarping/test/interpolation.hpp
similarity index 100%
rename from modules/gpuimgproc/test/interpolation.hpp
rename to modules/gpuwarping/test/interpolation.hpp
diff --git a/modules/gpuwarping/test/test_main.cpp b/modules/gpuwarping/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpuwarping/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpuwarping/test/test_precomp.cpp b/modules/gpuwarping/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpuwarping/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpuwarping/test/test_precomp.hpp b/modules/gpuwarping/test/test_precomp.hpp
new file mode 100644
index 000000000..90a28fe1a
--- /dev/null
+++ b/modules/gpuwarping/test/test_precomp.hpp
@@ -0,0 +1,62 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpuwarping.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include "interpolation.hpp"
+
+#endif
diff --git a/modules/gpuimgproc/test/test_pyramids.cpp b/modules/gpuwarping/test/test_pyramids.cpp
similarity index 97%
rename from modules/gpuimgproc/test/test_pyramids.cpp
rename to modules/gpuwarping/test/test_pyramids.cpp
index 6b0540fc1..f296b7d6f 100644
--- a/modules/gpuimgproc/test/test_pyramids.cpp
+++ b/modules/gpuwarping/test/test_pyramids.cpp
@@ -80,7 +80,7 @@ GPU_TEST_P(PyrDown, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-4 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, PyrDown, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
@@ -120,7 +120,7 @@ GPU_TEST_P(PyrUp, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-4 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, PyrUp, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
diff --git a/modules/gpuimgproc/test/test_remap.cpp b/modules/gpuwarping/test/test_remap.cpp
similarity index 99%
rename from modules/gpuimgproc/test/test_remap.cpp
rename to modules/gpuwarping/test/test_remap.cpp
index eb4b9ece8..c1899ff61 100644
--- a/modules/gpuimgproc/test/test_remap.cpp
+++ b/modules/gpuwarping/test/test_remap.cpp
@@ -169,7 +169,7 @@ GPU_TEST_P(Remap, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, Remap, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
diff --git a/modules/gpuimgproc/test/test_resize.cpp b/modules/gpuwarping/test/test_resize.cpp
similarity index 97%
rename from modules/gpuimgproc/test/test_resize.cpp
rename to modules/gpuwarping/test/test_resize.cpp
index 593c891e6..27289834a 100644
--- a/modules/gpuimgproc/test/test_resize.cpp
+++ b/modules/gpuwarping/test/test_resize.cpp
@@ -152,7 +152,7 @@ GPU_TEST_P(Resize, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, Resize, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
@@ -198,7 +198,7 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, ResizeSameAsHost, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
@@ -241,7 +241,7 @@ GPU_TEST_P(ResizeNPP, Accuracy)
     EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-1);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeNPP, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, ResizeNPP, testing::Combine(
     ALL_DEVICES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
     testing::Values(0.3, 0.5, 1.5, 2.0),
diff --git a/modules/gpuimgproc/test/test_warp_affine.cpp b/modules/gpuwarping/test/test_warp_affine.cpp
similarity index 98%
rename from modules/gpuimgproc/test/test_warp_affine.cpp
rename to modules/gpuwarping/test/test_warp_affine.cpp
index 43bf0f6d9..206446c4d 100644
--- a/modules/gpuimgproc/test/test_warp_affine.cpp
+++ b/modules/gpuwarping/test/test_warp_affine.cpp
@@ -101,7 +101,7 @@ GPU_TEST_P(BuildWarpAffineMaps, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, BuildWarpAffineMaps, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, BuildWarpAffineMaps, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DIRECT_INVERSE));
@@ -222,7 +222,7 @@ GPU_TEST_P(WarpAffine, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, WarpAffine, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
@@ -271,7 +271,7 @@ GPU_TEST_P(WarpAffineNPP, Accuracy)
     EXPECT_MAT_SIMILAR(dst_gold, dst, 2e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffineNPP, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, WarpAffineNPP, testing::Combine(
     ALL_DEVICES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
     DIRECT_INVERSE,
diff --git a/modules/gpuimgproc/test/test_warp_perspective.cpp b/modules/gpuwarping/test/test_warp_perspective.cpp
similarity index 98%
rename from modules/gpuimgproc/test/test_warp_perspective.cpp
rename to modules/gpuwarping/test/test_warp_perspective.cpp
index d225e58b6..49f844c3f 100644
--- a/modules/gpuimgproc/test/test_warp_perspective.cpp
+++ b/modules/gpuwarping/test/test_warp_perspective.cpp
@@ -102,7 +102,7 @@ GPU_TEST_P(BuildWarpPerspectiveMaps, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, BuildWarpPerspectiveMaps, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, BuildWarpPerspectiveMaps, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     DIRECT_INVERSE));
@@ -225,7 +225,7 @@ GPU_TEST_P(WarpPerspective, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, WarpPerspective, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
@@ -274,7 +274,7 @@ GPU_TEST_P(WarpPerspectiveNPP, Accuracy)
     EXPECT_MAT_SIMILAR(dst_gold, dst, 2e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspectiveNPP, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Warping, WarpPerspectiveNPP, testing::Combine(
     ALL_DEVICES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
     DIRECT_INVERSE,
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index e90bcb6ac..d1807e929 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -19,6 +19,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
   if(HAVE_opencv_gpu)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuwarping/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuvideo/include")
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index d69ccdc2e..670c71c9e 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,8 +2,9 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpulegacy opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpuobjdetect
-                                     opencv_gpucalib3d)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
+                                     opencv_gpufeatures2d opencv_gpuvideo opencv_gpuobjdetect
+                                     opencv_gpucalib3d opencv_gpulegacy)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 

From e72ba73e708537686218bfdd35a1214a0ccdd9b4 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 18:20:18 +0400
Subject: [PATCH 27/49] moved gpu nlm to photo module

---
 modules/gpuimgproc/CMakeLists.txt             |   2 +-
 modules/gpuimgproc/doc/image_processing.rst   | 100 +-----------------
 .../gpuimgproc/include/opencv2/gpuimgproc.hpp |  19 ----
 modules/gpuimgproc/perf/perf_imgproc.cpp      |  46 ++++++++
 modules/gpuimgproc/perf/perf_precomp.hpp      |   2 -
 modules/gpuimgproc/src/bilateral_filter.cpp   |  99 +++++++++++++++++
 modules/gpuimgproc/test/test_imgproc.cpp      |   2 +-
 modules/photo/CMakeLists.txt                  |   2 +-
 modules/photo/doc/denoising.rst               |  99 +++++++++++++++++
 modules/photo/include/opencv2/photo/gpu.hpp   |  71 +++++++++++++
 .../perf/perf_gpu.cpp}                        |  49 +--------
 modules/{gpuimgproc => photo}/src/cuda/nlm.cu |   0
 .../src/denoising_gpu.cpp}                    |  52 +--------
 .../test/test_denoising_gpu.cpp}              |  34 +-----
 14 files changed, 332 insertions(+), 245 deletions(-)
 create mode 100644 modules/gpuimgproc/src/bilateral_filter.cpp
 create mode 100644 modules/photo/include/opencv2/photo/gpu.hpp
 rename modules/{gpuimgproc/perf/perf_denoising.cpp => photo/perf/perf_gpu.cpp} (82%)
 rename modules/{gpuimgproc => photo}/src/cuda/nlm.cu (100%)
 rename modules/{gpuimgproc/src/denoising.cpp => photo/src/denoising_gpu.cpp} (72%)
 rename modules/{gpuimgproc/test/test_denoising.cpp => photo/test/test_denoising_gpu.cpp} (85%)

diff --git a/modules/gpuimgproc/CMakeLists.txt b/modules/gpuimgproc/CMakeLists.txt
index 04a31d5e7..19a66dca9 100644
--- a/modules/gpuimgproc/CMakeLists.txt
+++ b/modules/gpuimgproc/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Image Processing")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_define_module(gpuimgproc opencv_imgproc opencv_gpuarithm opencv_gpufilters OPTIONAL opencv_photo)
+ocv_define_module(gpuimgproc opencv_imgproc opencv_gpuarithm opencv_gpufilters)
diff --git a/modules/gpuimgproc/doc/image_processing.rst b/modules/gpuimgproc/doc/image_processing.rst
index 52d7b92ad..fed3aaf25 100644
--- a/modules/gpuimgproc/doc/image_processing.rst
+++ b/modules/gpuimgproc/doc/image_processing.rst
@@ -397,105 +397,7 @@ Performs bilateral filtering of passed image
 
 .. seealso::
 
-    :ocv:func:`bilateralFilter`,
-
-
-gpu::nonLocalMeans
--------------------
-Performs pure non local means denoising without any simplification, and thus it is not fast.
-
-.. ocv:function:: void gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null())
-
-    :param src: Source image. Supports only CV_8UC1, CV_8UC2 and CV_8UC3.
-
-    :param dst: Destination image.
-
-    :param h: Filter sigma regulating filter strength for color.
-
-    :param search_window: Size of search window.
-
-    :param block_size: Size of block used for computing weights.
-
-    :param borderMode:  Border type. See :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso::
-
-    :ocv:func:`fastNlMeansDenoising`
-
-
-
-gpu::FastNonLocalMeansDenoising
--------------------------------
-.. ocv:class:: gpu::FastNonLocalMeansDenoising
-
-    ::
-
-        class FastNonLocalMeansDenoising
-        {
-        public:
-            //! Simple method, recommended for grayscale images (though it supports multichannel images)
-            void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
-            //! Processes luminance and color components separatelly
-            void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
-        };
-
-The class implements fast approximate Non Local Means Denoising algorithm.
-
-
-
-gpu::FastNonLocalMeansDenoising::simpleMethod()
------------------------------------------------
-Perform image denoising using Non-local Means Denoising algorithm http://www.ipol.im/pub/algo/bcm_non_local_means_denoising with several computational optimizations. Noise expected to be a gaussian white noise
-
-.. ocv:function:: void gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
-
-    :param src: Input 8-bit 1-channel, 2-channel or 3-channel image.
-
-    :param dst: Output image with the same size and type as  ``src`` .
-
-    :param h: Parameter regulating filter strength. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise
-
-    :param search_window: Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater search_window - greater denoising time. Recommended value 21 pixels
-
-    :param block_size: Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels
-
-    :param stream: Stream for the asynchronous invocations.
-
-This function expected to be applied to grayscale images. For colored images look at ``FastNonLocalMeansDenoising::labMethod``.
-
-.. seealso::
-
-    :ocv:func:`fastNlMeansDenoising`
-
-
-
-gpu::FastNonLocalMeansDenoising::labMethod()
---------------------------------------------
-Modification of ``FastNonLocalMeansDenoising::simpleMethod`` for color images
-
-.. ocv:function:: void gpu::FastNonLocalMeansDenoising::labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
-
-    :param src: Input 8-bit 3-channel image.
-
-    :param dst: Output image with the same size and type as  ``src`` .
-
-    :param h_luminance: Parameter regulating filter strength. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise
-
-    :param float: The same as h but for color components. For most images value equals 10 will be enought to remove colored noise and do not distort colors
-
-    :param search_window: Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater search_window - greater denoising time. Recommended value 21 pixels
-
-    :param block_size: Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels
-
-    :param stream: Stream for the asynchronous invocations.
-
-The function converts image to CIELAB colorspace and then separately denoise L and AB components with given h parameters using ``FastNonLocalMeansDenoising::simpleMethod`` function.
-
-.. seealso::
-
-    :ocv:func:`fastNlMeansDenoisingColored`
+    :ocv:func:`bilateralFilter`
 
 
 
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
index 00ef0a3e9..8d1533446 100644
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -143,25 +143,6 @@ CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat
 CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial,
                                 int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());
 
-//! Brute force non-local means algorith (slow but universal)
-CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
-
-//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique)
-class CV_EXPORTS FastNonLocalMeansDenoising
-{
-public:
-    //! Simple method, recommended for grayscale images (though it supports multichannel images)
-    void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
-
-    //! Processes luminance and color components separatelly
-    void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
-
-private:
-
-    GpuMat buffer, extended_src_buffer;
-    GpuMat lab, l, ab;
-};
-
 struct CV_EXPORTS CannyBuf
 {
     void create(const Size& image_size, int apperture_size = 3);
diff --git a/modules/gpuimgproc/perf/perf_imgproc.cpp b/modules/gpuimgproc/perf/perf_imgproc.cpp
index ff19e14ed..73e298855 100644
--- a/modules/gpuimgproc/perf/perf_imgproc.cpp
+++ b/modules/gpuimgproc/perf/perf_imgproc.cpp
@@ -1046,3 +1046,49 @@ PERF_TEST_P(Method_Sz, GeneralizedHough,
         CPU_SANITY_CHECK(positions);
     }
 }
+
+//////////////////////////////////////////////////////////////////////
+// BilateralFilter
+
+DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
+
+PERF_TEST_P(Sz_Depth_Cn_KernelSz, BilateralFilter,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_32F),
+                    GPU_CHANNELS_1_3,
+                    Values(3, 5, 9)))
+{
+    declare.time(60.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int kernel_size = GET_PARAM(3);
+
+    const float sigma_color = 7;
+    const float sigma_spatial = 5;
+    const int borderMode = cv::BORDER_REFLECT101;
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_precomp.hpp b/modules/gpuimgproc/perf/perf_precomp.hpp
index 6ecb958f4..adaa4ebb7 100644
--- a/modules/gpuimgproc/perf/perf_precomp.hpp
+++ b/modules/gpuimgproc/perf/perf_precomp.hpp
@@ -57,8 +57,6 @@
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/imgproc.hpp"
 
-#include "opencv2/photo.hpp"
-
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
 #endif
diff --git a/modules/gpuimgproc/src/bilateral_filter.cpp b/modules/gpuimgproc/src/bilateral_filter.cpp
new file mode 100644
index 000000000..0c14987d4
--- /dev/null
+++ b/modules/gpuimgproc/src/bilateral_filter.cpp
@@ -0,0 +1,99 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+void cv::gpu::bilateralFilter(const GpuMat&, GpuMat&, int, float, float, int, Stream&) { throw_no_cuda(); }
+
+#else
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        template<typename T>
+        void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t stream);
+    }
+}}}
+
+void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& s)
+{
+    using cv::gpu::cudev::imgproc::bilateral_filter_gpu;
+
+    typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s);
+
+    static const func_t funcs[6][4] =
+    {
+        {bilateral_filter_gpu<uchar>      , 0 /*bilateral_filter_gpu<uchar2>*/ , bilateral_filter_gpu<uchar3>      , bilateral_filter_gpu<uchar4>      },
+        {0 /*bilateral_filter_gpu<schar>*/, 0 /*bilateral_filter_gpu<schar2>*/ , 0 /*bilateral_filter_gpu<schar3>*/, 0 /*bilateral_filter_gpu<schar4>*/},
+        {bilateral_filter_gpu<ushort>     , 0 /*bilateral_filter_gpu<ushort2>*/, bilateral_filter_gpu<ushort3>     , bilateral_filter_gpu<ushort4>     },
+        {bilateral_filter_gpu<short>      , 0 /*bilateral_filter_gpu<short2>*/ , bilateral_filter_gpu<short3>      , bilateral_filter_gpu<short4>      },
+        {0 /*bilateral_filter_gpu<int>*/  , 0 /*bilateral_filter_gpu<int2>*/   , 0 /*bilateral_filter_gpu<int3>*/  , 0 /*bilateral_filter_gpu<int4>*/  },
+        {bilateral_filter_gpu<float>      , 0 /*bilateral_filter_gpu<float2>*/ , bilateral_filter_gpu<float3>      , bilateral_filter_gpu<float4>      }
+    };
+
+    sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color;
+    sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial;
+
+
+    int radius = (kernel_size <= 0) ? cvRound(sigma_spatial*1.5) : kernel_size/2;
+    kernel_size = std::max(radius, 1)*2 + 1;
+
+    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+    const func_t func = funcs[src.depth()][src.channels() - 1];
+    CV_Assert(func != 0);
+
+    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
+
+    int gpuBorderType;
+    CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
+
+    dst.create(src.size(), src.type());
+    func(src, dst, kernel_size, sigma_spatial, sigma_color, gpuBorderType, StreamAccessor::getStream(s));
+}
+
+#endif
diff --git a/modules/gpuimgproc/test/test_imgproc.cpp b/modules/gpuimgproc/test/test_imgproc.cpp
index 0e66fe03d..0fa1d0ffa 100644
--- a/modules/gpuimgproc/test/test_imgproc.cpp
+++ b/modules/gpuimgproc/test/test_imgproc.cpp
@@ -881,7 +881,7 @@ GPU_TEST_P(BilateralFilter, Accuracy)
     EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Denoising, BilateralFilter, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, BilateralFilter, testing::Combine(
     ALL_DEVICES,
     testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(639, 481)),
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_32FC1), MatType(CV_32FC3))
diff --git a/modules/photo/CMakeLists.txt b/modules/photo/CMakeLists.txt
index 08a72ea92..d36651201 100644
--- a/modules/photo/CMakeLists.txt
+++ b/modules/photo/CMakeLists.txt
@@ -1,2 +1,2 @@
 set(the_description "Computational Photography")
-ocv_define_module(photo opencv_imgproc)
+ocv_define_module(photo opencv_imgproc OPTIONAL opencv_gpuimgproc)
diff --git a/modules/photo/doc/denoising.rst b/modules/photo/doc/denoising.rst
index 97625d3b3..5a4d7448c 100644
--- a/modules/photo/doc/denoising.rst
+++ b/modules/photo/doc/denoising.rst
@@ -89,3 +89,102 @@ Modification of ``fastNlMeansDenoisingMulti`` function for colored images sequen
 
 The function converts images to CIELAB colorspace and then separately denoise L and AB components with given h parameters using ``fastNlMeansDenoisingMulti`` function.
 
+
+
+gpu::nonLocalMeans
+-------------------
+Performs pure non local means denoising without any simplification, and thus it is not fast.
+
+.. ocv:function:: void gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null())
+
+    :param src: Source image. Supports only CV_8UC1, CV_8UC2 and CV_8UC3.
+
+    :param dst: Destination image.
+
+    :param h: Filter sigma regulating filter strength for color.
+
+    :param search_window: Size of search window.
+
+    :param block_size: Size of block used for computing weights.
+
+    :param borderMode:  Border type. See :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso::
+
+    :ocv:func:`fastNlMeansDenoising`
+
+
+
+gpu::FastNonLocalMeansDenoising
+-------------------------------
+.. ocv:class:: gpu::FastNonLocalMeansDenoising
+
+    ::
+
+        class FastNonLocalMeansDenoising
+        {
+        public:
+            //! Simple method, recommended for grayscale images (though it supports multichannel images)
+            void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
+            //! Processes luminance and color components separatelly
+            void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
+        };
+
+The class implements fast approximate Non Local Means Denoising algorithm.
+
+
+
+gpu::FastNonLocalMeansDenoising::simpleMethod()
+-----------------------------------------------
+Perform image denoising using Non-local Means Denoising algorithm http://www.ipol.im/pub/algo/bcm_non_local_means_denoising with several computational optimizations. Noise expected to be a gaussian white noise
+
+.. ocv:function:: void gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
+
+    :param src: Input 8-bit 1-channel, 2-channel or 3-channel image.
+
+    :param dst: Output image with the same size and type as  ``src`` .
+
+    :param h: Parameter regulating filter strength. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise
+
+    :param search_window: Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater search_window - greater denoising time. Recommended value 21 pixels
+
+    :param block_size: Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels
+
+    :param stream: Stream for the asynchronous invocations.
+
+This function expected to be applied to grayscale images. For colored images look at ``FastNonLocalMeansDenoising::labMethod``.
+
+.. seealso::
+
+    :ocv:func:`fastNlMeansDenoising`
+
+
+
+gpu::FastNonLocalMeansDenoising::labMethod()
+--------------------------------------------
+Modification of ``FastNonLocalMeansDenoising::simpleMethod`` for color images
+
+.. ocv:function:: void gpu::FastNonLocalMeansDenoising::labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null())
+
+    :param src: Input 8-bit 3-channel image.
+
+    :param dst: Output image with the same size and type as  ``src`` .
+
+    :param h_luminance: Parameter regulating filter strength. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise
+
+    :param float: The same as h but for color components. For most images value equals 10 will be enought to remove colored noise and do not distort colors
+
+    :param search_window: Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater search_window - greater denoising time. Recommended value 21 pixels
+
+    :param block_size: Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels
+
+    :param stream: Stream for the asynchronous invocations.
+
+The function converts image to CIELAB colorspace and then separately denoise L and AB components with given h parameters using ``FastNonLocalMeansDenoising::simpleMethod`` function.
+
+.. seealso::
+
+    :ocv:func:`fastNlMeansDenoisingColored`
+
diff --git a/modules/photo/include/opencv2/photo/gpu.hpp b/modules/photo/include/opencv2/photo/gpu.hpp
new file mode 100644
index 000000000..8a018b567
--- /dev/null
+++ b/modules/photo/include/opencv2/photo/gpu.hpp
@@ -0,0 +1,71 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PHOTO_GPU_HPP__
+#define __OPENCV_PHOTO_GPU_HPP__
+
+#include "opencv2/core/gpumat.hpp"
+
+namespace cv { namespace gpu {
+
+//! Brute force non-local means algorith (slow but universal)
+CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
+
+//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique)
+class CV_EXPORTS FastNonLocalMeansDenoising
+{
+public:
+    //! Simple method, recommended for grayscale images (though it supports multichannel images)
+    void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
+
+    //! Processes luminance and color components separatelly
+    void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
+
+private:
+
+    GpuMat buffer, extended_src_buffer;
+    GpuMat lab, l, ab;
+};
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_PHOTO_GPU_HPP__ */
diff --git a/modules/gpuimgproc/perf/perf_denoising.cpp b/modules/photo/perf/perf_gpu.cpp
similarity index 82%
rename from modules/gpuimgproc/perf/perf_denoising.cpp
rename to modules/photo/perf/perf_gpu.cpp
index 1e33601d6..f33dd8199 100644
--- a/modules/gpuimgproc/perf/perf_denoising.cpp
+++ b/modules/photo/perf/perf_gpu.cpp
@@ -42,58 +42,15 @@
 
 #include "perf_precomp.hpp"
 
+#include "opencv2/photo/gpu.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
 using namespace std;
 using namespace testing;
 using namespace perf;
 
 #define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p)
 
-//////////////////////////////////////////////////////////////////////
-// BilateralFilter
-
-DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
-
-PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter,
-            Combine(GPU_DENOISING_IMAGE_SIZES,
-                    Values(CV_8U, CV_32F),
-                    GPU_CHANNELS_1_3,
-                    Values(3, 5, 9)))
-{
-    declare.time(60.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int kernel_size = GET_PARAM(3);
-
-    const float sigma_color = 7;
-    const float sigma_spatial = 5;
-    const int borderMode = cv::BORDER_REFLECT101;
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // nonLocalMeans
 
diff --git a/modules/gpuimgproc/src/cuda/nlm.cu b/modules/photo/src/cuda/nlm.cu
similarity index 100%
rename from modules/gpuimgproc/src/cuda/nlm.cu
rename to modules/photo/src/cuda/nlm.cu
diff --git a/modules/gpuimgproc/src/denoising.cpp b/modules/photo/src/denoising_gpu.cpp
similarity index 72%
rename from modules/gpuimgproc/src/denoising.cpp
rename to modules/photo/src/denoising_gpu.cpp
index 1687f8e3c..21647315c 100644
--- a/modules/gpuimgproc/src/denoising.cpp
+++ b/modules/photo/src/denoising_gpu.cpp
@@ -42,18 +42,20 @@
 
 #include "precomp.hpp"
 
+#include "opencv2/photo/gpu.hpp"
+#include "opencv2/core/gpu_private.hpp"
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpuimgproc.hpp"
+
 using namespace cv;
 using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::bilateralFilter(const GpuMat&, GpuMat&, int, float, float, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
-
 void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); }
 
-
 #else
 
 //////////////////////////////////////////////////////////////////////////////////
@@ -63,50 +65,11 @@ namespace cv { namespace gpu { namespace cudev
 {
     namespace imgproc
     {
-        template<typename T>
-        void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t stream);
-
         template<typename T>
         void nlm_bruteforce_gpu(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
     }
 }}}
 
-void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& s)
-{
-    using cv::gpu::cudev::imgproc::bilateral_filter_gpu;
-
-    typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s);
-
-    static const func_t funcs[6][4] =
-    {
-        {bilateral_filter_gpu<uchar>      , 0 /*bilateral_filter_gpu<uchar2>*/ , bilateral_filter_gpu<uchar3>      , bilateral_filter_gpu<uchar4>      },
-        {0 /*bilateral_filter_gpu<schar>*/, 0 /*bilateral_filter_gpu<schar2>*/ , 0 /*bilateral_filter_gpu<schar3>*/, 0 /*bilateral_filter_gpu<schar4>*/},
-        {bilateral_filter_gpu<ushort>     , 0 /*bilateral_filter_gpu<ushort2>*/, bilateral_filter_gpu<ushort3>     , bilateral_filter_gpu<ushort4>     },
-        {bilateral_filter_gpu<short>      , 0 /*bilateral_filter_gpu<short2>*/ , bilateral_filter_gpu<short3>      , bilateral_filter_gpu<short4>      },
-        {0 /*bilateral_filter_gpu<int>*/  , 0 /*bilateral_filter_gpu<int2>*/   , 0 /*bilateral_filter_gpu<int3>*/  , 0 /*bilateral_filter_gpu<int4>*/  },
-        {bilateral_filter_gpu<float>      , 0 /*bilateral_filter_gpu<float2>*/ , bilateral_filter_gpu<float3>      , bilateral_filter_gpu<float4>      }
-    };
-
-    sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color;
-    sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial;
-
-
-    int radius = (kernel_size <= 0) ? cvRound(sigma_spatial*1.5) : kernel_size/2;
-    kernel_size = std::max(radius, 1)*2 + 1;
-
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    const func_t func = funcs[src.depth()][src.channels() - 1];
-    CV_Assert(func != 0);
-
-    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
-
-    int gpuBorderType;
-    CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
-
-    dst.create(src.size(), src.type());
-    func(src, dst, kernel_size, sigma_spatial, sigma_color, gpuBorderType, StreamAccessor::getStream(s));
-}
-
 void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s)
 {
     using cv::gpu::cudev::imgproc::nlm_bruteforce_gpu;
@@ -129,11 +92,6 @@ void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_
     func(src, dst, search_window/2, block_window/2, h, gpuBorderType, StreamAccessor::getStream(s));
 }
 
-
-//////////////////////////////////////////////////////////////////////////////////
-//// Non Local Means Denosing (fast approxinate)
-
-
 namespace cv { namespace gpu { namespace cudev
 {
     namespace imgproc
diff --git a/modules/gpuimgproc/test/test_denoising.cpp b/modules/photo/test/test_denoising_gpu.cpp
similarity index 85%
rename from modules/gpuimgproc/test/test_denoising.cpp
rename to modules/photo/test/test_denoising_gpu.cpp
index cb4ea1ecc..f46a49fc1 100644
--- a/modules/gpuimgproc/test/test_denoising.cpp
+++ b/modules/photo/test/test_denoising_gpu.cpp
@@ -42,26 +42,17 @@
 
 #include "test_precomp.hpp"
 
+#include "opencv2/photo/gpu.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
 #ifdef HAVE_CUDA
 
 using namespace cvtest;
 
-
 ////////////////////////////////////////////////////////
 // Brute Force Non local means
 
-struct BruteForceNonLocalMeans: testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(BruteForceNonLocalMeans, Regression)
+TEST(BruteForceNonLocalMeans, Regression)
 {
     using cv::gpu::GpuMat;
 
@@ -88,23 +79,10 @@ GPU_TEST_P(BruteForceNonLocalMeans, Regression)
     EXPECT_MAT_NEAR(gray_gold, dgray, 1e-4);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Denoising, BruteForceNonLocalMeans, ALL_DEVICES);
-
 ////////////////////////////////////////////////////////
 // Fast Force Non local means
 
-struct FastNonLocalMeans: testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(FastNonLocalMeans, Regression)
+TEST(FastNonLocalMeans, Regression)
 {
     using cv::gpu::GpuMat;
 
@@ -133,6 +111,4 @@ GPU_TEST_P(FastNonLocalMeans, Regression)
     EXPECT_MAT_NEAR(gray_gold, dgray, 1);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Denoising, FastNonLocalMeans, ALL_DEVICES);
-
 #endif // HAVE_CUDA

From cad9518928f673ffaf5adb4e7cd1067be96c485d Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 18:16:34 +0400
Subject: [PATCH 28/49] moved image labeling to gpu module

---
 modules/gpu/include/opencv2/gpu.hpp              | 16 ++++++++++++++++
 .../{gpuimgproc => gpu}/perf/perf_labeling.cpp   |  0
 .../{gpuimgproc => gpu}/src/cuda/ccomponetns.cu  |  0
 modules/{gpuimgproc => gpu}/src/graphcuts.cpp    |  0
 .../{gpuimgproc => gpu}/test/test_labeling.cpp   |  0
 .../gpuimgproc/include/opencv2/gpuimgproc.hpp    | 16 ----------------
 6 files changed, 16 insertions(+), 16 deletions(-)
 rename modules/{gpuimgproc => gpu}/perf/perf_labeling.cpp (100%)
 rename modules/{gpuimgproc => gpu}/src/cuda/ccomponetns.cu (100%)
 rename modules/{gpuimgproc => gpu}/src/graphcuts.cpp (100%)
 rename modules/{gpuimgproc => gpu}/test/test_labeling.cpp (100%)

diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 10fbbd7d8..0466ac472 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -55,6 +55,22 @@
 
 namespace cv { namespace gpu {
 
+//!performs labeling via graph cuts of a 2D regular 4-connected graph.
+CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
+                         GpuMat& buf, Stream& stream = Stream::Null());
+
+//!performs labeling via graph cuts of a 2D regular 8-connected graph.
+CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
+                         GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight,
+                         GpuMat& labels,
+                         GpuMat& buf, Stream& stream = Stream::Null());
+
+//! compute mask for Generalized Flood fill componetns labeling.
+CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
+
+//! performs connected componnents labeling.
+CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
+
 //! removes points (CV_32FC2, single row matrix) with zero mask value
 CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);
 
diff --git a/modules/gpuimgproc/perf/perf_labeling.cpp b/modules/gpu/perf/perf_labeling.cpp
similarity index 100%
rename from modules/gpuimgproc/perf/perf_labeling.cpp
rename to modules/gpu/perf/perf_labeling.cpp
diff --git a/modules/gpuimgproc/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu
similarity index 100%
rename from modules/gpuimgproc/src/cuda/ccomponetns.cu
rename to modules/gpu/src/cuda/ccomponetns.cu
diff --git a/modules/gpuimgproc/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp
similarity index 100%
rename from modules/gpuimgproc/src/graphcuts.cpp
rename to modules/gpu/src/graphcuts.cpp
diff --git a/modules/gpuimgproc/test/test_labeling.cpp b/modules/gpu/test/test_labeling.cpp
similarity index 100%
rename from modules/gpuimgproc/test/test_labeling.cpp
rename to modules/gpu/test/test_labeling.cpp
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
index 8d1533446..5bfaa3b5c 100644
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -223,22 +223,6 @@ private:
     CannyBuf cannyBuf_;
 };
 
-//!performs labeling via graph cuts of a 2D regular 4-connected graph.
-CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
-                         GpuMat& buf, Stream& stream = Stream::Null());
-
-//!performs labeling via graph cuts of a 2D regular 8-connected graph.
-CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
-                         GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight,
-                         GpuMat& labels,
-                         GpuMat& buf, Stream& stream = Stream::Null());
-
-//! compute mask for Generalized Flood fill componetns labeling.
-CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
-
-//! performs connected componnents labeling.
-CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
-
 //! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
 CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel);
 //! Calculates histogram with evenly distributed bins for signle channel source.

From 28b1caa73055f075ebfed6f1adf2b6d1d7e2d50a Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:30:04 +0400
Subject: [PATCH 29/49] gpustereo module for stereo correspondence

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/doc/calib3d.rst                   |  36 +++
 modules/gpu/include/opencv2/gpu.hpp           |  14 +-
 modules/gpu/perf/perf_calib3d.cpp             | 135 ++++++++++
 modules/gpu/perf/perf_precomp.hpp             |   1 +
 modules/{gpucalib3d => gpu}/src/calib3d.cpp   |  64 -----
 .../{gpucalib3d => gpu}/src/cuda/calib3d.cu   | 183 --------------
 modules/gpu/src/precomp.hpp                   |   1 +
 .../{gpucalib3d => gpu}/test/test_calib3d.cpp | 158 ------------
 modules/gpu/test/test_precomp.hpp             |   1 +
 modules/gpucalib3d/CMakeLists.txt             |   9 -
 modules/gpucalib3d/doc/gpucalib3d.rst         |   8 -
 modules/gpustereo/CMakeLists.txt              |   9 +
 modules/gpustereo/doc/gpustereo.rst           |   8 +
 .../doc/stereo.rst}                           |  36 +--
 .../include/opencv2/gpustereo.hpp}            |  22 +-
 .../perf/perf_main.cpp                        |   2 +-
 .../perf/perf_precomp.cpp                     |   0
 .../perf/perf_precomp.hpp                     |   3 +-
 .../perf/perf_stereo.cpp}                     | 129 +---------
 .../src/cuda/disparity_bilateral_filter.cu}   |   0
 .../src/cuda/stereobm.cu                      |   0
 .../src/cuda/stereobp.cu                      |   0
 .../src/cuda/stereocsbp.cu                    |   0
 modules/gpustereo/src/cuda/util.cu            | 235 ++++++++++++++++++
 .../src/disparity_bilateral_filter.cpp        |   0
 .../{gpucalib3d => gpustereo}/src/precomp.cpp |   0
 .../{gpucalib3d => gpustereo}/src/precomp.hpp |   6 +-
 .../src/stereobm.cpp                          |   0
 .../src/stereobp.cpp                          |   0
 .../src/stereocsbp.cpp                        |   2 +-
 modules/gpustereo/src/util.cpp                | 117 +++++++++
 .../test/test_main.cpp                        |   0
 .../test/test_precomp.cpp                     |   0
 .../test/test_precomp.hpp                     |   3 +-
 modules/gpustereo/test/test_stereo.cpp        | 207 +++++++++++++++
 samples/cpp/CMakeLists.txt                    |   2 +-
 samples/gpu/CMakeLists.txt                    |   2 +-
 38 files changed, 786 insertions(+), 609 deletions(-)
 create mode 100644 modules/gpu/doc/calib3d.rst
 create mode 100644 modules/gpu/perf/perf_calib3d.cpp
 rename modules/{gpucalib3d => gpu}/src/calib3d.cpp (82%)
 rename modules/{gpucalib3d => gpu}/src/cuda/calib3d.cu (58%)
 rename modules/{gpucalib3d => gpu}/test/test_calib3d.cpp (61%)
 delete mode 100644 modules/gpucalib3d/CMakeLists.txt
 delete mode 100644 modules/gpucalib3d/doc/gpucalib3d.rst
 create mode 100644 modules/gpustereo/CMakeLists.txt
 create mode 100644 modules/gpustereo/doc/gpustereo.rst
 rename modules/{gpucalib3d/doc/camera_calibration_and_3d_reconstruction.rst => gpustereo/doc/stereo.rst} (92%)
 rename modules/{gpucalib3d/include/opencv2/gpucalib3d.hpp => gpustereo/include/opencv2/gpustereo.hpp} (91%)
 rename modules/{gpucalib3d => gpustereo}/perf/perf_main.cpp (97%)
 rename modules/{gpucalib3d => gpustereo}/perf/perf_precomp.cpp (100%)
 rename modules/{gpucalib3d => gpustereo}/perf/perf_precomp.hpp (98%)
 rename modules/{gpucalib3d/perf/perf_calib3d.cpp => gpustereo/perf/perf_stereo.cpp} (68%)
 rename modules/{gpucalib3d/src/cuda/disp_bilateral_filter.cu => gpustereo/src/cuda/disparity_bilateral_filter.cu} (100%)
 rename modules/{gpucalib3d => gpustereo}/src/cuda/stereobm.cu (100%)
 rename modules/{gpucalib3d => gpustereo}/src/cuda/stereobp.cu (100%)
 rename modules/{gpucalib3d => gpustereo}/src/cuda/stereocsbp.cu (100%)
 create mode 100644 modules/gpustereo/src/cuda/util.cu
 rename modules/{gpucalib3d => gpustereo}/src/disparity_bilateral_filter.cpp (100%)
 rename modules/{gpucalib3d => gpustereo}/src/precomp.cpp (100%)
 rename modules/{gpucalib3d => gpustereo}/src/precomp.hpp (94%)
 rename modules/{gpucalib3d => gpustereo}/src/stereobm.cpp (100%)
 rename modules/{gpucalib3d => gpustereo}/src/stereobp.cpp (100%)
 rename modules/{gpucalib3d => gpustereo}/src/stereocsbp.cpp (99%)
 create mode 100644 modules/gpustereo/src/util.cpp
 rename modules/{gpucalib3d => gpustereo}/test/test_main.cpp (100%)
 rename modules/{gpucalib3d => gpustereo}/test/test_precomp.cpp (100%)
 rename modules/{gpucalib3d => gpustereo}/test/test_precomp.hpp (98%)
 create mode 100644 modules/gpustereo/test/test_stereo.cpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 578957037..dec43263a 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -7,7 +7,7 @@ set(the_description "GPU-accelerated Computer Vision")
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
 ocv_define_module(gpu opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
-                      opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d opencv_gpuobjdetect)
+                      opencv_gpufeatures2d opencv_gpuvideo opencv_gpustereo opencv_gpuobjdetect)
 
 if(HAVE_CUDA)
   add_subdirectory(perf4au)
diff --git a/modules/gpu/doc/calib3d.rst b/modules/gpu/doc/calib3d.rst
new file mode 100644
index 000000000..faa6c0fec
--- /dev/null
+++ b/modules/gpu/doc/calib3d.rst
@@ -0,0 +1,36 @@
+Camera Calibration and 3D Reconstruction
+========================================
+
+.. highlight:: cpp
+
+
+
+gpu::solvePnPRansac
+-------------------
+Finds the object pose from 3D-2D point correspondences.
+
+.. ocv:function:: void gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, int num_iters=100, float max_dist=8.0, int min_inlier_count=100, vector<int>* inliers=NULL)
+
+    :param object: Single-row matrix of object points.
+
+    :param image: Single-row matrix of image points.
+
+    :param camera_mat: 3x3 matrix of intrinsic camera parameters.
+
+    :param dist_coef: Distortion coefficients. See :ocv:func:`undistortPoints` for details.
+
+    :param rvec: Output 3D rotation vector.
+
+    :param tvec: Output 3D translation vector.
+
+    :param use_extrinsic_guess: Flag to indicate that the function must use ``rvec`` and ``tvec`` as an initial transformation guess. It is not supported for now.
+
+    :param num_iters: Maximum number of RANSAC iterations.
+
+    :param max_dist: Euclidean distance threshold to detect whether point is inlier or not.
+
+    :param min_inlier_count: Flag to indicate that the function must stop if greater or equal number of inliers is achieved. It is not supported for now.
+
+    :param inliers: Output vector of inlier indices.
+
+.. seealso:: :ocv:func:`solvePnPRansac`
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 0466ac472..72b3f49a2 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpufeatures2d.hpp"
 #include "opencv2/gpuvideo.hpp"
-#include "opencv2/gpucalib3d.hpp"
+#include "opencv2/gpustereo.hpp"
 #include "opencv2/gpuobjdetect.hpp"
 
 namespace cv { namespace gpu {
@@ -71,6 +71,18 @@ CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Sc
 //! performs connected componnents labeling.
 CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
 
+CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
+                                GpuMat& dst, Stream& stream = Stream::Null());
+
+CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
+                              const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
+                              Stream& stream = Stream::Null());
+
+CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
+                               const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
+                               int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
+                               std::vector<int>* inliers=NULL);
+
 //! removes points (CV_32FC2, single row matrix) with zero mask value
 CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);
 
diff --git a/modules/gpu/perf/perf_calib3d.cpp b/modules/gpu/perf/perf_calib3d.cpp
new file mode 100644
index 000000000..185d9cd68
--- /dev/null
+++ b/modules/gpu/perf/perf_calib3d.cpp
@@ -0,0 +1,135 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+DEF_PARAM_TEST_1(Count, int);
+
+//////////////////////////////////////////////////////////////////////
+// ProjectPoints
+
+PERF_TEST_P(Count, Calib3D_ProjectPoints,
+            Values(5000, 10000, 20000))
+{
+    const int count = GetParam();
+
+    cv::Mat src(1, count, CV_32FC3);
+    declare.in(src, WARMUP_RNG);
+
+    const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
+    const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
+    const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SolvePnPRansac
+
+PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
+            Values(5000, 10000, 20000))
+{
+    declare.time(10.0);
+
+    const int count = GetParam();
+
+    cv::Mat object(1, count, CV_32FC3);
+    declare.in(object, WARMUP_RNG);
+
+    cv::Mat camera_mat(3, 3, CV_32FC1);
+    cv::randu(camera_mat, 0.5, 1);
+    camera_mat.at<float>(0, 1) = 0.f;
+    camera_mat.at<float>(1, 0) = 0.f;
+    camera_mat.at<float>(2, 0) = 0.f;
+    camera_mat.at<float>(2, 1) = 0.f;
+
+    const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
+
+    cv::Mat rvec_gold(1, 3, CV_32FC1);
+    cv::randu(rvec_gold, 0, 1);
+
+    cv::Mat tvec_gold(1, 3, CV_32FC1);
+    cv::randu(tvec_gold, 0, 1);
+
+    std::vector<cv::Point2f> image_vec;
+    cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
+
+    const cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
+
+    cv::Mat rvec;
+    cv::Mat tvec;
+
+    if (PERF_RUN_GPU())
+    {
+        TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
+
+        GPU_SANITY_CHECK(rvec, 1e-3);
+        GPU_SANITY_CHECK(tvec, 1e-3);
+    }
+    else
+    {
+        TEST_CYCLE() cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
+
+        CPU_SANITY_CHECK(rvec, 1e-6);
+        CPU_SANITY_CHECK(tvec, 1e-6);
+    }
+}
diff --git a/modules/gpu/perf/perf_precomp.hpp b/modules/gpu/perf/perf_precomp.hpp
index 9c75581d9..0329b3a98 100644
--- a/modules/gpu/perf/perf_precomp.hpp
+++ b/modules/gpu/perf/perf_precomp.hpp
@@ -55,6 +55,7 @@
 #include "opencv2/ts/gpu_perf.hpp"
 
 #include "opencv2/gpu.hpp"
+#include "opencv2/calib3d.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
diff --git a/modules/gpucalib3d/src/calib3d.cpp b/modules/gpu/src/calib3d.cpp
similarity index 82%
rename from modules/gpucalib3d/src/calib3d.cpp
rename to modules/gpu/src/calib3d.cpp
index 135859094..cb3b1464b 100644
--- a/modules/gpucalib3d/src/calib3d.cpp
+++ b/modules/gpu/src/calib3d.cpp
@@ -50,8 +50,6 @@ using namespace cv::gpu;
 void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, std::vector<int>*) { throw_no_cuda(); }
-void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -289,66 +287,4 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
     }
 }
 
-////////////////////////////////////////////////////////////////////////
-// reprojectImageTo3D
-
-namespace cv { namespace gpu { namespace cudev
-{
-    template <typename T, typename D>
-    void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-}}}
-
-void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, int dst_cn, Stream& stream)
-{
-    using namespace cv::gpu::cudev;
-
-    typedef void (*func_t)(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-    static const func_t funcs[2][4] =
-    {
-        {reprojectImageTo3D_gpu<uchar, float3>, 0, 0, reprojectImageTo3D_gpu<short, float3>},
-        {reprojectImageTo3D_gpu<uchar, float4>, 0, 0, reprojectImageTo3D_gpu<short, float4>}
-    };
-
-    CV_Assert(disp.type() == CV_8U || disp.type() == CV_16S);
-    CV_Assert(Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4 && Q.isContinuous());
-    CV_Assert(dst_cn == 3 || dst_cn == 4);
-
-    xyz.create(disp.size(), CV_MAKE_TYPE(CV_32F, dst_cn));
-
-    funcs[dst_cn == 4][disp.type()](disp, xyz, Q.ptr<float>(), StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// drawColorDisp
-
-namespace cv { namespace gpu { namespace cudev
-{
-    void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
-    void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
-}}}
-
-namespace
-{
-    template <typename T>
-    void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
-    {
-        using namespace ::cv::gpu::cudev;
-
-        dst.create(src.size(), CV_8UC4);
-
-        drawColorDisp_gpu((PtrStepSz<T>)src, dst, ndisp, stream);
-    }
-
-    typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
-
-    const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};
-}
-
-void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)
-{
-    CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
-
-    drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));
-}
-
 #endif
diff --git a/modules/gpucalib3d/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu
similarity index 58%
rename from modules/gpucalib3d/src/cuda/calib3d.cu
rename to modules/gpu/src/cuda/calib3d.cu
index d1d59ce23..6085e716d 100644
--- a/modules/gpucalib3d/src/cuda/calib3d.cu
+++ b/modules/gpu/src/cuda/calib3d.cu
@@ -187,189 +187,6 @@ namespace cv { namespace gpu { namespace cudev
             cudaSafeCall( cudaDeviceSynchronize() );
         }
     } // namespace solvepnp_ransac
-
-
-
-    /////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
-
-    __constant__ float cq[16];
-
-    template <typename T, typename D>
-    __global__ void reprojectImageTo3D(const PtrStepSz<T> disp, PtrStep<D> xyz)
-    {
-        const int x = blockIdx.x * blockDim.x + threadIdx.x;
-        const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if (y >= disp.rows || x >= disp.cols)
-            return;
-
-        const float qx = x * cq[ 0] + y * cq[ 1] + cq[ 3];
-        const float qy = x * cq[ 4] + y * cq[ 5] + cq[ 7];
-        const float qz = x * cq[ 8] + y * cq[ 9] + cq[11];
-        const float qw = x * cq[12] + y * cq[13] + cq[15];
-
-        const T d = disp(y, x);
-
-        const float iW = 1.f / (qw + cq[14] * d);
-
-        D v = VecTraits<D>::all(1.0f);
-        v.x = (qx + cq[2] * d) * iW;
-        v.y = (qy + cq[6] * d) * iW;
-        v.z = (qz + cq[10] * d) * iW;
-
-        xyz(y, x) = v;
-    }
-
-    template <typename T, typename D>
-    void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream)
-    {
-        dim3 block(32, 8);
-        dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
-
-        cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
-
-        reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-
-    template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-    template void reprojectImageTo3D_gpu<uchar, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-    template void reprojectImageTo3D_gpu<short, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-    template void reprojectImageTo3D_gpu<short, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
-
-    /////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
-
-    template <typename T>
-    __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
-    {
-        unsigned int H = ((ndisp-d) * 240)/ndisp;
-
-        unsigned int hi = (H/60) % 6;
-        float f = H/60.f - H/60;
-        float p = V * (1 - S);
-        float q = V * (1 - f * S);
-        float t = V * (1 - (1 - f) * S);
-
-        float3 res;
-
-        if (hi == 0) //R = V,	G = t,	B = p
-        {
-            res.x = p;
-            res.y = t;
-            res.z = V;
-        }
-
-        if (hi == 1) // R = q,	G = V,	B = p
-        {
-            res.x = p;
-            res.y = V;
-            res.z = q;
-        }
-
-        if (hi == 2) // R = p,	G = V,	B = t
-        {
-            res.x = t;
-            res.y = V;
-            res.z = p;
-        }
-
-        if (hi == 3) // R = p,	G = q,	B = V
-        {
-            res.x = V;
-            res.y = q;
-            res.z = p;
-        }
-
-        if (hi == 4) // R = t,	G = p,	B = V
-        {
-            res.x = V;
-            res.y = p;
-            res.z = t;
-        }
-
-        if (hi == 5) // R = V,	G = p,	B = q
-        {
-            res.x = q;
-            res.y = p;
-            res.z = V;
-        }
-        const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f);
-        const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f);
-        const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f);
-        const unsigned int a = 255U;
-
-        return (a << 24) + (r << 16) + (g << 8) + b;
-    }
-
-    __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
-    {
-        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
-        const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if(x < width && y < height)
-        {
-            uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);
-
-            uint4 res;
-            res.x = cvtPixel(d4.x, ndisp);
-            res.y = cvtPixel(d4.y, ndisp);
-            res.z = cvtPixel(d4.z, ndisp);
-            res.w = cvtPixel(d4.w, ndisp);
-
-            uint4* line = (uint4*)(out_image + y * out_step);
-            line[x >> 2] = res;
-        }
-    }
-
-    __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
-    {
-        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
-        const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if(x < width && y < height)
-        {
-            short2 d2 = *(short2*)(disp + y * disp_step + x);
-
-            uint2 res;
-            res.x = cvtPixel(d2.x, ndisp);
-            res.y = cvtPixel(d2.y, ndisp);
-
-            uint2* line = (uint2*)(out_image + y * out_step);
-            line[x >> 1] = res;
-        }
-    }
-
-
-    void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
-    {
-        dim3 threads(16, 16, 1);
-        dim3 grid(1, 1, 1);
-        grid.x = divUp(src.cols, threads.x << 2);
-        grid.y = divUp(src.rows, threads.y);
-
-        drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-
-    void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
-    {
-        dim3 threads(32, 8, 1);
-        dim3 grid(1, 1, 1);
-        grid.x = divUp(src.cols, threads.x << 1);
-        grid.y = divUp(src.rows, threads.y);
-
-        drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
 }}} // namespace cv { namespace gpu { namespace cudev
 
 
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index 1b5207b38..c662cb1d7 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -44,6 +44,7 @@
 #define __OPENCV_PRECOMP_H__
 
 #include "opencv2/gpu.hpp"
+#include "opencv2/calib3d.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
 
diff --git a/modules/gpucalib3d/test/test_calib3d.cpp b/modules/gpu/test/test_calib3d.cpp
similarity index 61%
rename from modules/gpucalib3d/test/test_calib3d.cpp
rename to modules/gpu/test/test_calib3d.cpp
index 5de3d34df..3ad19dcbe 100644
--- a/modules/gpucalib3d/test/test_calib3d.cpp
+++ b/modules/gpu/test/test_calib3d.cpp
@@ -46,123 +46,6 @@
 
 using namespace cvtest;
 
-//////////////////////////////////////////////////////////////////////////
-// StereoBM
-
-struct StereoBM : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(StereoBM, Regression)
-{
-    cv::Mat left_image  = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat disp_gold   = readImage("stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
-
-    ASSERT_FALSE(left_image.empty());
-    ASSERT_FALSE(right_image.empty());
-    ASSERT_FALSE(disp_gold.empty());
-
-    cv::gpu::StereoBM_GPU bm(0, 128, 19);
-    cv::gpu::GpuMat disp;
-
-    bm(loadMat(left_image), loadMat(right_image), disp);
-
-    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBM, ALL_DEVICES);
-
-//////////////////////////////////////////////////////////////////////////
-// StereoBeliefPropagation
-
-struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(StereoBeliefPropagation, Regression)
-{
-    cv::Mat left_image  = readImage("stereobp/aloe-L.png");
-    cv::Mat right_image = readImage("stereobp/aloe-R.png");
-    cv::Mat disp_gold   = readImage("stereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
-
-    ASSERT_FALSE(left_image.empty());
-    ASSERT_FALSE(right_image.empty());
-    ASSERT_FALSE(disp_gold.empty());
-
-    cv::gpu::StereoBeliefPropagation bp(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
-    cv::gpu::GpuMat disp;
-
-    bp(loadMat(left_image), loadMat(right_image), disp);
-
-    cv::Mat h_disp(disp);
-    h_disp.convertTo(h_disp, disp_gold.depth());
-
-    EXPECT_MAT_NEAR(disp_gold, h_disp, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBeliefPropagation, ALL_DEVICES);
-
-//////////////////////////////////////////////////////////////////////////
-// StereoConstantSpaceBP
-
-struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(StereoConstantSpaceBP, Regression)
-{
-    cv::Mat left_image  = readImage("csstereobp/aloe-L.png");
-    cv::Mat right_image = readImage("csstereobp/aloe-R.png");
-
-    cv::Mat disp_gold;
-
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
-        disp_gold = readImage("csstereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
-    else
-        disp_gold = readImage("csstereobp/aloe-disp_CC1X.png", cv::IMREAD_GRAYSCALE);
-
-    ASSERT_FALSE(left_image.empty());
-    ASSERT_FALSE(right_image.empty());
-    ASSERT_FALSE(disp_gold.empty());
-
-    cv::gpu::StereoConstantSpaceBP csbp(128, 16, 4, 4);
-    cv::gpu::GpuMat disp;
-
-    csbp(loadMat(left_image), loadMat(right_image), disp);
-
-    cv::Mat h_disp(disp);
-    h_disp.convertTo(h_disp, disp_gold.depth());
-
-    EXPECT_MAT_NEAR(disp_gold, h_disp, 1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // transformPoints
 
@@ -304,45 +187,4 @@ GPU_TEST_P(SolvePnPRansac, Accuracy)
 
 INSTANTIATE_TEST_CASE_P(GPU_Calib3D, SolvePnPRansac, ALL_DEVICES);
 
-////////////////////////////////////////////////////////////////////////////////
-// reprojectImageTo3D
-
-PARAM_TEST_CASE(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(ReprojectImageTo3D, Accuracy)
-{
-    cv::Mat disp = randomMat(size, depth, 5.0, 30.0);
-    cv::Mat Q = randomMat(cv::Size(4, 4), CV_32FC1, 0.1, 1.0);
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::reprojectImageTo3D(loadMat(disp, useRoi), dst, Q, 3);
-
-    cv::Mat dst_gold;
-    cv::reprojectImageTo3D(disp, dst_gold, Q, false);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ReprojectImageTo3D, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U), MatDepth(CV_16S)),
-    WHOLE_SUBMAT));
-
 #endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index 1e4248101..3177e88e7 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -57,5 +57,6 @@
 #include "opencv2/core.hpp"
 #include "opencv2/core/opengl.hpp"
 #include "opencv2/gpu.hpp"
+#include "opencv2/calib3d.hpp"
 
 #endif
diff --git a/modules/gpucalib3d/CMakeLists.txt b/modules/gpucalib3d/CMakeLists.txt
deleted file mode 100644
index bb949c4d3..000000000
--- a/modules/gpucalib3d/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-if(ANDROID OR IOS)
-  ocv_module_disable(gpucalib3d)
-endif()
-
-set(the_description "GPU-accelerated Camera Calibration and 3D Reconstruction")
-
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
-
-ocv_define_module(gpucalib3d opencv_calib3d opencv_gpuarithm)
diff --git a/modules/gpucalib3d/doc/gpucalib3d.rst b/modules/gpucalib3d/doc/gpucalib3d.rst
deleted file mode 100644
index 5dffaa048..000000000
--- a/modules/gpucalib3d/doc/gpucalib3d.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-*************************************************************
-gpu. GPU-accelerated Camera Calibration and 3D Reconstruction
-*************************************************************
-
-.. toctree::
-    :maxdepth: 1
-
-    camera_calibration_and_3d_reconstruction
diff --git a/modules/gpustereo/CMakeLists.txt b/modules/gpustereo/CMakeLists.txt
new file mode 100644
index 000000000..c2a302c8a
--- /dev/null
+++ b/modules/gpustereo/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpustereo)
+endif()
+
+set(the_description "GPU-accelerated Stereo Correspondence")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpustereo opencv_calib3d)
diff --git a/modules/gpustereo/doc/gpustereo.rst b/modules/gpustereo/doc/gpustereo.rst
new file mode 100644
index 000000000..d3076794c
--- /dev/null
+++ b/modules/gpustereo/doc/gpustereo.rst
@@ -0,0 +1,8 @@
+************************************************
+gpustereo. GPU-accelerated Stereo Correspondence
+************************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    stereo
diff --git a/modules/gpucalib3d/doc/camera_calibration_and_3d_reconstruction.rst b/modules/gpustereo/doc/stereo.rst
similarity index 92%
rename from modules/gpucalib3d/doc/camera_calibration_and_3d_reconstruction.rst
rename to modules/gpustereo/doc/stereo.rst
index 587c253d2..cd2add0b9 100644
--- a/modules/gpucalib3d/doc/camera_calibration_and_3d_reconstruction.rst
+++ b/modules/gpustereo/doc/stereo.rst
@@ -1,5 +1,5 @@
-Camera Calibration and 3D Reconstruction
-========================================
+Stereo Correspondence
+=====================
 
 .. highlight:: cpp
 
@@ -462,38 +462,6 @@ Reprojects a disparity image to 3D space.
 
 
 
-gpu::solvePnPRansac
--------------------
-Finds the object pose from 3D-2D point correspondences.
-
-.. ocv:function:: void gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, int num_iters=100, float max_dist=8.0, int min_inlier_count=100, vector<int>* inliers=NULL)
-
-    :param object: Single-row matrix of object points.
-
-    :param image: Single-row matrix of image points.
-
-    :param camera_mat: 3x3 matrix of intrinsic camera parameters.
-
-    :param dist_coef: Distortion coefficients. See :ocv:func:`undistortPoints` for details.
-
-    :param rvec: Output 3D rotation vector.
-
-    :param tvec: Output 3D translation vector.
-
-    :param use_extrinsic_guess: Flag to indicate that the function must use ``rvec`` and ``tvec`` as an initial transformation guess. It is not supported for now.
-
-    :param num_iters: Maximum number of RANSAC iterations.
-
-    :param max_dist: Euclidean distance threshold to detect whether point is inlier or not.
-
-    :param min_inlier_count: Flag to indicate that the function must stop if greater or equal number of inliers is achieved. It is not supported for now.
-
-    :param inliers: Output vector of inlier indices.
-
-.. seealso:: :ocv:func:`solvePnPRansac`
-
-
-
 .. [Felzenszwalb2006] Pedro F. Felzenszwalb algorithm [Pedro F. Felzenszwalb and Daniel P. Huttenlocher. *Efficient belief propagation for early vision*. International Journal of Computer Vision, 70(1), October 2006
 
 .. [Yang2010] Q. Yang, L. Wang, and N. Ahuja. *A constant-space belief propagation algorithm for stereo matching*. In CVPR, 2010.
diff --git a/modules/gpucalib3d/include/opencv2/gpucalib3d.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp
similarity index 91%
rename from modules/gpucalib3d/include/opencv2/gpucalib3d.hpp
rename to modules/gpustereo/include/opencv2/gpustereo.hpp
index 3496d987b..54ce82d08 100644
--- a/modules/gpucalib3d/include/opencv2/gpucalib3d.hpp
+++ b/modules/gpustereo/include/opencv2/gpustereo.hpp
@@ -40,8 +40,12 @@
 //
 //M*/
 
-#ifndef __OPENCV_GPUCALIB3D_HPP__
-#define __OPENCV_GPUCALIB3D_HPP__
+#ifndef __OPENCV_GPUSTEREO_HPP__
+#define __OPENCV_GPUSTEREO_HPP__
+
+#ifndef __cplusplus
+#  error gpustereo.hpp header must be compiled as C++
+#endif
 
 #include "opencv2/core/gpumat.hpp"
 
@@ -226,18 +230,6 @@ private:
     GpuMat table_space;
 };
 
-CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
-                                GpuMat& dst, Stream& stream = Stream::Null());
-
-CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
-                              const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
-                              Stream& stream = Stream::Null());
-
-CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
-                               const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
-                               int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
-                               std::vector<int>* inliers=NULL);
-
 //! Reprojects disparity image to 3D space.
 //! Supports CV_8U and CV_16S types of input disparity.
 //! The output is a 3- or 4-channel floating-point matrix.
@@ -252,4 +244,4 @@ CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndis
 
 }} // namespace cv { namespace gpu {
 
-#endif /* __OPENCV_GPUCALIB3D_HPP__ */
+#endif /* __OPENCV_GPUSTEREO_HPP__ */
diff --git a/modules/gpucalib3d/perf/perf_main.cpp b/modules/gpustereo/perf/perf_main.cpp
similarity index 97%
rename from modules/gpucalib3d/perf/perf_main.cpp
rename to modules/gpustereo/perf/perf_main.cpp
index b35791cda..d681cdb4d 100644
--- a/modules/gpucalib3d/perf/perf_main.cpp
+++ b/modules/gpustereo/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
+CV_PERF_TEST_MAIN(gpustereo, printCudaInfo())
diff --git a/modules/gpucalib3d/perf/perf_precomp.cpp b/modules/gpustereo/perf/perf_precomp.cpp
similarity index 100%
rename from modules/gpucalib3d/perf/perf_precomp.cpp
rename to modules/gpustereo/perf/perf_precomp.cpp
diff --git a/modules/gpucalib3d/perf/perf_precomp.hpp b/modules/gpustereo/perf/perf_precomp.hpp
similarity index 98%
rename from modules/gpucalib3d/perf/perf_precomp.hpp
rename to modules/gpustereo/perf/perf_precomp.hpp
index dc244a72a..f61de9fae 100644
--- a/modules/gpucalib3d/perf/perf_precomp.hpp
+++ b/modules/gpustereo/perf/perf_precomp.hpp
@@ -54,8 +54,7 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
-#include "opencv2/gpucalib3d.hpp"
-
+#include "opencv2/gpustereo.hpp"
 #include "opencv2/calib3d.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
diff --git a/modules/gpucalib3d/perf/perf_calib3d.cpp b/modules/gpustereo/perf/perf_stereo.cpp
similarity index 68%
rename from modules/gpucalib3d/perf/perf_calib3d.cpp
rename to modules/gpustereo/perf/perf_stereo.cpp
index 725f49c53..e0438c0ae 100644
--- a/modules/gpucalib3d/perf/perf_calib3d.cpp
+++ b/modules/gpustereo/perf/perf_stereo.cpp
@@ -52,7 +52,7 @@ using namespace perf;
 typedef std::tr1::tuple<string, string> pair_string;
 DEF_PARAM_TEST_1(ImagePair, pair_string);
 
-PERF_TEST_P(ImagePair, Calib3D_StereoBM,
+PERF_TEST_P(ImagePair, StereoBM,
             Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
 {
     declare.time(300.0);
@@ -93,7 +93,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM,
 //////////////////////////////////////////////////////////////////////
 // StereoBeliefPropagation
 
-PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
+PERF_TEST_P(ImagePair, StereoBeliefPropagation,
             Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
 {
     declare.time(300.0);
@@ -127,7 +127,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
 //////////////////////////////////////////////////////////////////////
 // StereoConstantSpaceBP
 
-PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
+PERF_TEST_P(ImagePair, StereoConstantSpaceBP,
             Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
 {
     declare.time(300.0);
@@ -161,7 +161,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
 //////////////////////////////////////////////////////////////////////
 // DisparityBilateralFilter
 
-PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
+PERF_TEST_P(ImagePair, DisparityBilateralFilter,
             Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
 {
     const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
@@ -190,127 +190,10 @@ PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// TransformPoints
-
-DEF_PARAM_TEST_1(Count, int);
-
-PERF_TEST_P(Count, Calib3D_TransformPoints,
-            Values(5000, 10000, 20000))
-{
-    const int count = GetParam();
-
-    cv::Mat src(1, count, CV_32FC3);
-    declare.in(src, WARMUP_RNG);
-
-    const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
-    const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::transformPoints(d_src, rvec, tvec, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// ProjectPoints
-
-PERF_TEST_P(Count, Calib3D_ProjectPoints,
-            Values(5000, 10000, 20000))
-{
-    const int count = GetParam();
-
-    cv::Mat src(1, count, CV_32FC3);
-    declare.in(src, WARMUP_RNG);
-
-    const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
-    const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
-    const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// SolvePnPRansac
-
-PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
-            Values(5000, 10000, 20000))
-{
-    declare.time(10.0);
-
-    const int count = GetParam();
-
-    cv::Mat object(1, count, CV_32FC3);
-    declare.in(object, WARMUP_RNG);
-
-    cv::Mat camera_mat(3, 3, CV_32FC1);
-    cv::randu(camera_mat, 0.5, 1);
-    camera_mat.at<float>(0, 1) = 0.f;
-    camera_mat.at<float>(1, 0) = 0.f;
-    camera_mat.at<float>(2, 0) = 0.f;
-    camera_mat.at<float>(2, 1) = 0.f;
-
-    const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
-
-    cv::Mat rvec_gold(1, 3, CV_32FC1);
-    cv::randu(rvec_gold, 0, 1);
-
-    cv::Mat tvec_gold(1, 3, CV_32FC1);
-    cv::randu(tvec_gold, 0, 1);
-
-    std::vector<cv::Point2f> image_vec;
-    cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
-
-    const cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
-
-    cv::Mat rvec;
-    cv::Mat tvec;
-
-    if (PERF_RUN_GPU())
-    {
-        TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
-
-        GPU_SANITY_CHECK(rvec, 1e-3);
-        GPU_SANITY_CHECK(tvec, 1e-3);
-    }
-    else
-    {
-        TEST_CYCLE() cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
-
-        CPU_SANITY_CHECK(rvec, 1e-6);
-        CPU_SANITY_CHECK(tvec, 1e-6);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // ReprojectImageTo3D
 
-PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
+PERF_TEST_P(Sz_Depth, ReprojectImageTo3D,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16S)))
 {
@@ -345,7 +228,7 @@ PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
 //////////////////////////////////////////////////////////////////////
 // DrawColorDisp
 
-PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp,
+PERF_TEST_P(Sz_Depth, DrawColorDisp,
             Combine(GPU_TYPICAL_MAT_SIZES,
                     Values(CV_8U, CV_16S)))
 {
diff --git a/modules/gpucalib3d/src/cuda/disp_bilateral_filter.cu b/modules/gpustereo/src/cuda/disparity_bilateral_filter.cu
similarity index 100%
rename from modules/gpucalib3d/src/cuda/disp_bilateral_filter.cu
rename to modules/gpustereo/src/cuda/disparity_bilateral_filter.cu
diff --git a/modules/gpucalib3d/src/cuda/stereobm.cu b/modules/gpustereo/src/cuda/stereobm.cu
similarity index 100%
rename from modules/gpucalib3d/src/cuda/stereobm.cu
rename to modules/gpustereo/src/cuda/stereobm.cu
diff --git a/modules/gpucalib3d/src/cuda/stereobp.cu b/modules/gpustereo/src/cuda/stereobp.cu
similarity index 100%
rename from modules/gpucalib3d/src/cuda/stereobp.cu
rename to modules/gpustereo/src/cuda/stereobp.cu
diff --git a/modules/gpucalib3d/src/cuda/stereocsbp.cu b/modules/gpustereo/src/cuda/stereocsbp.cu
similarity index 100%
rename from modules/gpucalib3d/src/cuda/stereocsbp.cu
rename to modules/gpustereo/src/cuda/stereocsbp.cu
diff --git a/modules/gpustereo/src/cuda/util.cu b/modules/gpustereo/src/cuda/util.cu
new file mode 100644
index 000000000..1945d2463
--- /dev/null
+++ b/modules/gpustereo/src/cuda/util.cu
@@ -0,0 +1,235 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/transform.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/reduce.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    /////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
+
+    __constant__ float cq[16];
+
+    template <typename T, typename D>
+    __global__ void reprojectImageTo3D(const PtrStepSz<T> disp, PtrStep<D> xyz)
+    {
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (y >= disp.rows || x >= disp.cols)
+            return;
+
+        const float qx = x * cq[ 0] + y * cq[ 1] + cq[ 3];
+        const float qy = x * cq[ 4] + y * cq[ 5] + cq[ 7];
+        const float qz = x * cq[ 8] + y * cq[ 9] + cq[11];
+        const float qw = x * cq[12] + y * cq[13] + cq[15];
+
+        const T d = disp(y, x);
+
+        const float iW = 1.f / (qw + cq[14] * d);
+
+        D v = VecTraits<D>::all(1.0f);
+        v.x = (qx + cq[2] * d) * iW;
+        v.y = (qy + cq[6] * d) * iW;
+        v.z = (qz + cq[10] * d) * iW;
+
+        xyz(y, x) = v;
+    }
+
+    template <typename T, typename D>
+    void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream)
+    {
+        dim3 block(32, 8);
+        dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
+
+        cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
+
+        reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    template void reprojectImageTo3D_gpu<uchar, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    template void reprojectImageTo3D_gpu<short, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    template void reprojectImageTo3D_gpu<short, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+
+    /////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
+
+    template <typename T>
+    __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
+    {
+        unsigned int H = ((ndisp-d) * 240)/ndisp;
+
+        unsigned int hi = (H/60) % 6;
+        float f = H/60.f - H/60;
+        float p = V * (1 - S);
+        float q = V * (1 - f * S);
+        float t = V * (1 - (1 - f) * S);
+
+        float3 res;
+
+        if (hi == 0) //R = V,	G = t,	B = p
+        {
+            res.x = p;
+            res.y = t;
+            res.z = V;
+        }
+
+        if (hi == 1) // R = q,	G = V,	B = p
+        {
+            res.x = p;
+            res.y = V;
+            res.z = q;
+        }
+
+        if (hi == 2) // R = p,	G = V,	B = t
+        {
+            res.x = t;
+            res.y = V;
+            res.z = p;
+        }
+
+        if (hi == 3) // R = p,	G = q,	B = V
+        {
+            res.x = V;
+            res.y = q;
+            res.z = p;
+        }
+
+        if (hi == 4) // R = t,	G = p,	B = V
+        {
+            res.x = V;
+            res.y = p;
+            res.z = t;
+        }
+
+        if (hi == 5) // R = V,	G = p,	B = q
+        {
+            res.x = q;
+            res.y = p;
+            res.z = V;
+        }
+        const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f);
+        const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f);
+        const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f);
+        const unsigned int a = 255U;
+
+        return (a << 24) + (r << 16) + (g << 8) + b;
+    }
+
+    __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if(x < width && y < height)
+        {
+            uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);
+
+            uint4 res;
+            res.x = cvtPixel(d4.x, ndisp);
+            res.y = cvtPixel(d4.y, ndisp);
+            res.z = cvtPixel(d4.z, ndisp);
+            res.w = cvtPixel(d4.w, ndisp);
+
+            uint4* line = (uint4*)(out_image + y * out_step);
+            line[x >> 2] = res;
+        }
+    }
+
+    __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if(x < width && y < height)
+        {
+            short2 d2 = *(short2*)(disp + y * disp_step + x);
+
+            uint2 res;
+            res.x = cvtPixel(d2.x, ndisp);
+            res.y = cvtPixel(d2.y, ndisp);
+
+            uint2* line = (uint2*)(out_image + y * out_step);
+            line[x >> 1] = res;
+        }
+    }
+
+
+    void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
+    {
+        dim3 threads(16, 16, 1);
+        dim3 grid(1, 1, 1);
+        grid.x = divUp(src.cols, threads.x << 2);
+        grid.y = divUp(src.rows, threads.y);
+
+        drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
+    {
+        dim3 threads(32, 8, 1);
+        dim3 grid(1, 1, 1);
+        grid.x = divUp(src.cols, threads.x << 1);
+        grid.y = divUp(src.rows, threads.y);
+
+        drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+}}} // namespace cv { namespace gpu { namespace cudev
+
+
+#endif /* CUDA_DISABLER */
diff --git a/modules/gpucalib3d/src/disparity_bilateral_filter.cpp b/modules/gpustereo/src/disparity_bilateral_filter.cpp
similarity index 100%
rename from modules/gpucalib3d/src/disparity_bilateral_filter.cpp
rename to modules/gpustereo/src/disparity_bilateral_filter.cpp
diff --git a/modules/gpucalib3d/src/precomp.cpp b/modules/gpustereo/src/precomp.cpp
similarity index 100%
rename from modules/gpucalib3d/src/precomp.cpp
rename to modules/gpustereo/src/precomp.cpp
diff --git a/modules/gpucalib3d/src/precomp.hpp b/modules/gpustereo/src/precomp.hpp
similarity index 94%
rename from modules/gpucalib3d/src/precomp.hpp
rename to modules/gpustereo/src/precomp.hpp
index 89396fd57..a552d5ff3 100644
--- a/modules/gpucalib3d/src/precomp.hpp
+++ b/modules/gpustereo/src/precomp.hpp
@@ -45,11 +45,7 @@
 
 #include <limits>
 
-#include "opencv2/gpucalib3d.hpp"
-#include "opencv2/gpuarithm.hpp"
-
-#include "opencv2/calib3d.hpp"
-#include "opencv2/imgproc.hpp"
+#include "opencv2/gpustereo.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
 
diff --git a/modules/gpucalib3d/src/stereobm.cpp b/modules/gpustereo/src/stereobm.cpp
similarity index 100%
rename from modules/gpucalib3d/src/stereobm.cpp
rename to modules/gpustereo/src/stereobm.cpp
diff --git a/modules/gpucalib3d/src/stereobp.cpp b/modules/gpustereo/src/stereobp.cpp
similarity index 100%
rename from modules/gpucalib3d/src/stereobp.cpp
rename to modules/gpustereo/src/stereobp.cpp
diff --git a/modules/gpucalib3d/src/stereocsbp.cpp b/modules/gpustereo/src/stereocsbp.cpp
similarity index 99%
rename from modules/gpucalib3d/src/stereocsbp.cpp
rename to modules/gpustereo/src/stereocsbp.cpp
index 431dfd38c..bd5ef4be9 100644
--- a/modules/gpucalib3d/src/stereocsbp.cpp
+++ b/modules/gpustereo/src/stereocsbp.cpp
@@ -196,7 +196,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
         for(int _r = 0; _r < 5; ++_r)
         {
             *buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5);
-            assert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane);
+            CV_DbgAssert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane);
         }
     };
 
diff --git a/modules/gpustereo/src/util.cpp b/modules/gpustereo/src/util.cpp
new file mode 100644
index 000000000..9bff6fff2
--- /dev/null
+++ b/modules/gpustereo/src/util.cpp
@@ -0,0 +1,117 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+
+void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, int, Stream&) { throw_no_cuda(); }
+void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
+
+#else
+
+////////////////////////////////////////////////////////////////////////
+// reprojectImageTo3D
+
+namespace cv { namespace gpu { namespace cudev
+{
+    template <typename T, typename D>
+    void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+}}}
+
+void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, int dst_cn, Stream& stream)
+{
+    using namespace cv::gpu::cudev;
+
+    typedef void (*func_t)(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
+    static const func_t funcs[2][4] =
+    {
+        {reprojectImageTo3D_gpu<uchar, float3>, 0, 0, reprojectImageTo3D_gpu<short, float3>},
+        {reprojectImageTo3D_gpu<uchar, float4>, 0, 0, reprojectImageTo3D_gpu<short, float4>}
+    };
+
+    CV_Assert(disp.type() == CV_8U || disp.type() == CV_16S);
+    CV_Assert(Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4 && Q.isContinuous());
+    CV_Assert(dst_cn == 3 || dst_cn == 4);
+
+    xyz.create(disp.size(), CV_MAKE_TYPE(CV_32F, dst_cn));
+
+    funcs[dst_cn == 4][disp.type()](disp, xyz, Q.ptr<float>(), StreamAccessor::getStream(stream));
+}
+
+////////////////////////////////////////////////////////////////////////
+// drawColorDisp
+
+namespace cv { namespace gpu { namespace cudev
+{
+    void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
+    void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
+}}}
+
+namespace
+{
+    template <typename T>
+    void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
+    {
+        using namespace ::cv::gpu::cudev;
+
+        dst.create(src.size(), CV_8UC4);
+
+        drawColorDisp_gpu((PtrStepSz<T>)src, dst, ndisp, stream);
+    }
+
+    typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
+
+    const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};
+}
+
+void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)
+{
+    CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
+
+    drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));
+}
+
+#endif
diff --git a/modules/gpucalib3d/test/test_main.cpp b/modules/gpustereo/test/test_main.cpp
similarity index 100%
rename from modules/gpucalib3d/test/test_main.cpp
rename to modules/gpustereo/test/test_main.cpp
diff --git a/modules/gpucalib3d/test/test_precomp.cpp b/modules/gpustereo/test/test_precomp.cpp
similarity index 100%
rename from modules/gpucalib3d/test/test_precomp.cpp
rename to modules/gpustereo/test/test_precomp.cpp
diff --git a/modules/gpucalib3d/test/test_precomp.hpp b/modules/gpustereo/test/test_precomp.hpp
similarity index 98%
rename from modules/gpucalib3d/test/test_precomp.hpp
rename to modules/gpustereo/test/test_precomp.hpp
index 8c53f4786..d55b1ec0d 100644
--- a/modules/gpucalib3d/test/test_precomp.hpp
+++ b/modules/gpustereo/test/test_precomp.hpp
@@ -54,8 +54,7 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_test.hpp"
 
-#include "opencv2/gpucalib3d.hpp"
-
+#include "opencv2/gpustereo.hpp"
 #include "opencv2/calib3d.hpp"
 
 #endif
diff --git a/modules/gpustereo/test/test_stereo.cpp b/modules/gpustereo/test/test_stereo.cpp
new file mode 100644
index 000000000..0ead03dc5
--- /dev/null
+++ b/modules/gpustereo/test/test_stereo.cpp
@@ -0,0 +1,207 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+//////////////////////////////////////////////////////////////////////////
+// StereoBM
+
+struct StereoBM : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(StereoBM, Regression)
+{
+    cv::Mat left_image  = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat disp_gold   = readImage("stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+
+    cv::gpu::StereoBM_GPU bm(0, 128, 19);
+    cv::gpu::GpuMat disp;
+
+    bm(loadMat(left_image), loadMat(right_image), disp);
+
+    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Stereo, StereoBM, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////////
+// StereoBeliefPropagation
+
+struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(StereoBeliefPropagation, Regression)
+{
+    cv::Mat left_image  = readImage("stereobp/aloe-L.png");
+    cv::Mat right_image = readImage("stereobp/aloe-R.png");
+    cv::Mat disp_gold   = readImage("stereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+
+    cv::gpu::StereoBeliefPropagation bp(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
+    cv::gpu::GpuMat disp;
+
+    bp(loadMat(left_image), loadMat(right_image), disp);
+
+    cv::Mat h_disp(disp);
+    h_disp.convertTo(h_disp, disp_gold.depth());
+
+    EXPECT_MAT_NEAR(disp_gold, h_disp, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Stereo, StereoBeliefPropagation, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////////
+// StereoConstantSpaceBP
+
+struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(StereoConstantSpaceBP, Regression)
+{
+    cv::Mat left_image  = readImage("csstereobp/aloe-L.png");
+    cv::Mat right_image = readImage("csstereobp/aloe-R.png");
+
+    cv::Mat disp_gold;
+
+    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+        disp_gold = readImage("csstereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+    else
+        disp_gold = readImage("csstereobp/aloe-disp_CC1X.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+
+    cv::gpu::StereoConstantSpaceBP csbp(128, 16, 4, 4);
+    cv::gpu::GpuMat disp;
+
+    csbp(loadMat(left_image), loadMat(right_image), disp);
+
+    cv::Mat h_disp(disp);
+    h_disp.convertTo(h_disp, disp_gold.depth());
+
+    EXPECT_MAT_NEAR(disp_gold, h_disp, 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Stereo, StereoConstantSpaceBP, ALL_DEVICES);
+
+////////////////////////////////////////////////////////////////////////////////
+// reprojectImageTo3D
+
+PARAM_TEST_CASE(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(ReprojectImageTo3D, Accuracy)
+{
+    cv::Mat disp = randomMat(size, depth, 5.0, 30.0);
+    cv::Mat Q = randomMat(cv::Size(4, 4), CV_32FC1, 0.1, 1.0);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::reprojectImageTo3D(loadMat(disp, useRoi), dst, Q, 3);
+
+    cv::Mat dst_gold;
+    cv::reprojectImageTo3D(disp, dst_gold, Q, false);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Stereo, ReprojectImageTo3D, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16S)),
+    WHOLE_SUBMAT));
+
+#endif // HAVE_CUDA
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index d1807e929..ea1b3e102 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -23,7 +23,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuvideo/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpucalib3d/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpustereo/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuobjdetect/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 670c71c9e..f03af5605 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -4,7 +4,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_nonfree opencv_softcascade opencv_superres
                                      opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
                                      opencv_gpufeatures2d opencv_gpuvideo opencv_gpuobjdetect
-                                     opencv_gpucalib3d opencv_gpulegacy)
+                                     opencv_gpustereo opencv_gpulegacy)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 

From eda124ec32436505c72721c136681d069e7d8764 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 11 Apr 2013 12:51:00 +0400
Subject: [PATCH 30/49] removed gpuobjdetect module

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/doc/gpu.rst                       |   1 +
 .../doc/object_detection.rst                  |   0
 modules/gpu/include/opencv2/gpu.hpp           | 121 +++++++++++-
 .../perf/perf_objdetect.cpp                   |   0
 modules/gpu/perf/perf_precomp.hpp             |   1 +
 .../src/cascadeclassifier.cpp                 |  35 +++-
 modules/{gpuobjdetect => gpu}/src/cuda/hog.cu |   0
 modules/{gpuobjdetect => gpu}/src/cuda/lbp.cu |   0
 .../{gpuobjdetect => gpu}/src/cuda/lbp.hpp    |   0
 modules/{gpuobjdetect => gpu}/src/hog.cpp     |   0
 modules/gpu/src/precomp.hpp                   |  11 ++
 .../test/test_objdetect.cpp                   |   0
 modules/gpu/test/test_precomp.hpp             |   5 +-
 modules/gpuobjdetect/CMakeLists.txt           |   9 -
 modules/gpuobjdetect/doc/gpuobjdetect.rst     |   8 -
 .../include/opencv2/gpuobjdetect.hpp          | 172 ------------------
 modules/gpuobjdetect/perf/perf_main.cpp       |  47 -----
 modules/gpuobjdetect/perf/perf_precomp.cpp    |  43 -----
 modules/gpuobjdetect/perf/perf_precomp.hpp    |  65 -------
 modules/gpuobjdetect/src/precomp.cpp          |  43 -----
 modules/gpuobjdetect/src/precomp.hpp          |  61 -------
 modules/gpuobjdetect/test/test_main.cpp       |  45 -----
 modules/gpuobjdetect/test/test_precomp.cpp    |  43 -----
 modules/gpuobjdetect/test/test_precomp.hpp    |  63 -------
 samples/cpp/CMakeLists.txt                    |   1 -
 samples/gpu/CMakeLists.txt                    |   2 +-
 27 files changed, 172 insertions(+), 606 deletions(-)
 rename modules/{gpuobjdetect => gpu}/doc/object_detection.rst (100%)
 rename modules/{gpuobjdetect => gpu}/perf/perf_objdetect.cpp (100%)
 rename modules/{gpuobjdetect => gpu}/src/cascadeclassifier.cpp (98%)
 rename modules/{gpuobjdetect => gpu}/src/cuda/hog.cu (100%)
 rename modules/{gpuobjdetect => gpu}/src/cuda/lbp.cu (100%)
 rename modules/{gpuobjdetect => gpu}/src/cuda/lbp.hpp (100%)
 rename modules/{gpuobjdetect => gpu}/src/hog.cpp (100%)
 rename modules/{gpuobjdetect => gpu}/test/test_objdetect.cpp (100%)
 delete mode 100644 modules/gpuobjdetect/CMakeLists.txt
 delete mode 100644 modules/gpuobjdetect/doc/gpuobjdetect.rst
 delete mode 100644 modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp
 delete mode 100644 modules/gpuobjdetect/perf/perf_main.cpp
 delete mode 100644 modules/gpuobjdetect/perf/perf_precomp.cpp
 delete mode 100644 modules/gpuobjdetect/perf/perf_precomp.hpp
 delete mode 100644 modules/gpuobjdetect/src/precomp.cpp
 delete mode 100644 modules/gpuobjdetect/src/precomp.hpp
 delete mode 100644 modules/gpuobjdetect/test/test_main.cpp
 delete mode 100644 modules/gpuobjdetect/test/test_precomp.cpp
 delete mode 100644 modules/gpuobjdetect/test/test_precomp.hpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index dec43263a..662a652b9 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -7,7 +7,7 @@ set(the_description "GPU-accelerated Computer Vision")
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
 ocv_define_module(gpu opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
-                      opencv_gpufeatures2d opencv_gpuvideo opencv_gpustereo opencv_gpuobjdetect)
+                      opencv_gpufeatures2d opencv_gpuvideo opencv_gpustereo)
 
 if(HAVE_CUDA)
   add_subdirectory(perf4au)
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index d98f90d3b..bc3b9bdb1 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -8,3 +8,4 @@ gpu. GPU-accelerated Computer Vision
     introduction
     initalization_and_information
     data_structures
+    object_detection
diff --git a/modules/gpuobjdetect/doc/object_detection.rst b/modules/gpu/doc/object_detection.rst
similarity index 100%
rename from modules/gpuobjdetect/doc/object_detection.rst
rename to modules/gpu/doc/object_detection.rst
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 72b3f49a2..db6157ab7 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -51,10 +51,129 @@
 #include "opencv2/gpufeatures2d.hpp"
 #include "opencv2/gpuvideo.hpp"
 #include "opencv2/gpustereo.hpp"
-#include "opencv2/gpuobjdetect.hpp"
 
 namespace cv { namespace gpu {
 
+//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+struct CV_EXPORTS HOGConfidence
+{
+   double scale;
+   std::vector<Point> locations;
+   std::vector<double> confidences;
+   std::vector<double> part_scores[4];
+};
+
+struct CV_EXPORTS HOGDescriptor
+{
+    enum { DEFAULT_WIN_SIGMA = -1 };
+    enum { DEFAULT_NLEVELS = 64 };
+    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+
+    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
+                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
+                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
+                  double threshold_L2hys=0.2, bool gamma_correction=true,
+                  int nlevels=DEFAULT_NLEVELS);
+
+    size_t getDescriptorSize() const;
+    size_t getBlockHistogramSize() const;
+
+    void setSVMDetector(const std::vector<float>& detector);
+
+    static std::vector<float> getDefaultPeopleDetector();
+    static std::vector<float> getPeopleDetector48x96();
+    static std::vector<float> getPeopleDetector64x128();
+
+    void detect(const GpuMat& img, std::vector<Point>& found_locations,
+                double hit_threshold=0, Size win_stride=Size(),
+                Size padding=Size());
+
+    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+                          double hit_threshold=0, Size win_stride=Size(),
+                          Size padding=Size(), double scale0=1.05,
+                          int group_threshold=2);
+
+    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
+                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
+
+    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+                                                                    double hit_threshold, Size win_stride, Size padding,
+                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
+
+    void getDescriptors(const GpuMat& img, Size win_stride,
+                        GpuMat& descriptors,
+                        int descr_format=DESCR_FORMAT_COL_BY_COL);
+
+    Size win_size;
+    Size block_size;
+    Size block_stride;
+    Size cell_size;
+    int nbins;
+    double win_sigma;
+    double threshold_L2hys;
+    bool gamma_correction;
+    int nlevels;
+
+protected:
+    void computeBlockHistograms(const GpuMat& img);
+    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
+
+    double getWinSigma() const;
+    bool checkDetectorSize() const;
+
+    static int numPartsWithin(int size, int part_size, int stride);
+    static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+    // Coefficients of the separating plane
+    float free_coef;
+    GpuMat detector;
+
+    // Results of the last classification step
+    GpuMat labels, labels_buf;
+    Mat labels_host;
+
+    // Results of the last histogram evaluation step
+    GpuMat block_hists, block_hists_buf;
+
+    // Gradients conputation results
+    GpuMat grad, qangle, grad_buf, qangle_buf;
+
+    // returns subbuffer with required size, reallocates buffer if nessesary.
+    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
+    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
+
+    std::vector<GpuMat> image_scales;
+};
+
+// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
+class CV_EXPORTS CascadeClassifier_GPU
+{
+public:
+    CascadeClassifier_GPU();
+    CascadeClassifier_GPU(const String& filename);
+    ~CascadeClassifier_GPU();
+
+    bool empty() const;
+    bool load(const String& filename);
+    void release();
+
+    /* returns number of detected objects */
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
+
+    bool findLargestObject;
+    bool visualizeInPlace;
+
+    Size getClassifierSize() const;
+
+private:
+    struct CascadeClassifierImpl;
+    CascadeClassifierImpl* impl;
+    struct HaarCascade;
+    struct LbpCascade;
+    friend class CascadeClassifier_GPU_LBP;
+};
+
 //!performs labeling via graph cuts of a 2D regular 4-connected graph.
 CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
                          GpuMat& buf, Stream& stream = Stream::Null());
diff --git a/modules/gpuobjdetect/perf/perf_objdetect.cpp b/modules/gpu/perf/perf_objdetect.cpp
similarity index 100%
rename from modules/gpuobjdetect/perf/perf_objdetect.cpp
rename to modules/gpu/perf/perf_objdetect.cpp
diff --git a/modules/gpu/perf/perf_precomp.hpp b/modules/gpu/perf/perf_precomp.hpp
index 0329b3a98..5bfb14b52 100644
--- a/modules/gpu/perf/perf_precomp.hpp
+++ b/modules/gpu/perf/perf_precomp.hpp
@@ -56,6 +56,7 @@
 
 #include "opencv2/gpu.hpp"
 #include "opencv2/calib3d.hpp"
+#include "opencv2/objdetect.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
diff --git a/modules/gpuobjdetect/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp
similarity index 98%
rename from modules/gpuobjdetect/src/cascadeclassifier.cpp
rename to modules/gpu/src/cascadeclassifier.cpp
index 0b9f9aafc..d9f34a54a 100644
--- a/modules/gpuobjdetect/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -41,8 +41,6 @@
 //M*/
 
 #include "precomp.hpp"
-#include <vector>
-#include <iostream>
 #include "opencv2/objdetect/objdetect_c.h"
 
 using namespace cv;
@@ -75,6 +73,37 @@ public:
     virtual bool read(const String& classifierAsXml) = 0;
 };
 
+#ifndef HAVE_OPENCV_GPULEGACY
+
+struct cv::gpu::CascadeClassifier_GPU::HaarCascade
+{
+public:
+    HaarCascade()
+    {
+        throw_no_cuda();
+    }
+
+    unsigned int process(const GpuMat&, GpuMat&, float, int, bool, bool, cv::Size, cv::Size)
+    {
+        throw_no_cuda();
+        return 0;
+    }
+
+    cv::Size getClassifierCvSize() const
+    {
+        throw_no_cuda();
+        return cv::Size();
+    }
+
+    bool read(const String&)
+    {
+        throw_no_cuda();
+        return false;
+    }
+};
+
+#else
+
 struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
 {
 public:
@@ -284,6 +313,8 @@ private:
     virtual ~HaarCascade(){}
 };
 
+#endif
+
 cv::Size operator -(const cv::Size& a, const cv::Size& b)
 {
     return cv::Size(a.width - b.width, a.height - b.height);
diff --git a/modules/gpuobjdetect/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu
similarity index 100%
rename from modules/gpuobjdetect/src/cuda/hog.cu
rename to modules/gpu/src/cuda/hog.cu
diff --git a/modules/gpuobjdetect/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu
similarity index 100%
rename from modules/gpuobjdetect/src/cuda/lbp.cu
rename to modules/gpu/src/cuda/lbp.cu
diff --git a/modules/gpuobjdetect/src/cuda/lbp.hpp b/modules/gpu/src/cuda/lbp.hpp
similarity index 100%
rename from modules/gpuobjdetect/src/cuda/lbp.hpp
rename to modules/gpu/src/cuda/lbp.hpp
diff --git a/modules/gpuobjdetect/src/hog.cpp b/modules/gpu/src/hog.cpp
similarity index 100%
rename from modules/gpuobjdetect/src/hog.cpp
rename to modules/gpu/src/hog.cpp
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index c662cb1d7..0b468a3e9 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -43,9 +43,20 @@
 #ifndef __OPENCV_PRECOMP_H__
 #define __OPENCV_PRECOMP_H__
 
+#include <vector>
+#include <iostream>
+
 #include "opencv2/gpu.hpp"
 #include "opencv2/calib3d.hpp"
+#include "opencv2/objdetect.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPULEGACY
+#  include "opencv2/gpulegacy.hpp"
+#  include "opencv2/gpulegacy/private.hpp"
+#endif
+
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuobjdetect/test/test_objdetect.cpp b/modules/gpu/test/test_objdetect.cpp
similarity index 100%
rename from modules/gpuobjdetect/test/test_objdetect.cpp
rename to modules/gpu/test/test_objdetect.cpp
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index 3177e88e7..f2b0bf405 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -51,12 +51,15 @@
 #ifndef __OPENCV_TEST_PRECOMP_HPP__
 #define __OPENCV_TEST_PRECOMP_HPP__
 
+#include <fstream>
+
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_test.hpp"
 
+#include "opencv2/gpu.hpp"
 #include "opencv2/core.hpp"
 #include "opencv2/core/opengl.hpp"
-#include "opencv2/gpu.hpp"
 #include "opencv2/calib3d.hpp"
+#include "opencv2/objdetect.hpp"
 
 #endif
diff --git a/modules/gpuobjdetect/CMakeLists.txt b/modules/gpuobjdetect/CMakeLists.txt
deleted file mode 100644
index bcc2242c5..000000000
--- a/modules/gpuobjdetect/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-if(ANDROID OR IOS)
-  ocv_module_disable(gpuobjdetect)
-endif()
-
-set(the_description "GPU-accelerated Object Detection")
-
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
-
-ocv_define_module(gpuobjdetect opencv_objdetect opencv_gpuwarping opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuobjdetect/doc/gpuobjdetect.rst b/modules/gpuobjdetect/doc/gpuobjdetect.rst
deleted file mode 100644
index c53225d2d..000000000
--- a/modules/gpuobjdetect/doc/gpuobjdetect.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-*************************************
-gpu. GPU-accelerated Object Detection
-*************************************
-
-.. toctree::
-    :maxdepth: 1
-
-    object_detection
diff --git a/modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp b/modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp
deleted file mode 100644
index ab665b3b7..000000000
--- a/modules/gpuobjdetect/include/opencv2/gpuobjdetect.hpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_GPUOBJDETECT_HPP__
-#define __OPENCV_GPUOBJDETECT_HPP__
-
-#include "opencv2/core/gpumat.hpp"
-
-namespace cv { namespace gpu {
-
-//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
-struct CV_EXPORTS HOGConfidence
-{
-   double scale;
-   std::vector<Point> locations;
-   std::vector<double> confidences;
-   std::vector<double> part_scores[4];
-};
-
-struct CV_EXPORTS HOGDescriptor
-{
-    enum { DEFAULT_WIN_SIGMA = -1 };
-    enum { DEFAULT_NLEVELS = 64 };
-    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
-    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
-                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
-                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
-                  double threshold_L2hys=0.2, bool gamma_correction=true,
-                  int nlevels=DEFAULT_NLEVELS);
-
-    size_t getDescriptorSize() const;
-    size_t getBlockHistogramSize() const;
-
-    void setSVMDetector(const std::vector<float>& detector);
-
-    static std::vector<float> getDefaultPeopleDetector();
-    static std::vector<float> getPeopleDetector48x96();
-    static std::vector<float> getPeopleDetector64x128();
-
-    void detect(const GpuMat& img, std::vector<Point>& found_locations,
-                double hit_threshold=0, Size win_stride=Size(),
-                Size padding=Size());
-
-    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                          double hit_threshold=0, Size win_stride=Size(),
-                          Size padding=Size(), double scale0=1.05,
-                          int group_threshold=2);
-
-    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
-                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
-
-    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                                                                    double hit_threshold, Size win_stride, Size padding,
-                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
-
-    void getDescriptors(const GpuMat& img, Size win_stride,
-                        GpuMat& descriptors,
-                        int descr_format=DESCR_FORMAT_COL_BY_COL);
-
-    Size win_size;
-    Size block_size;
-    Size block_stride;
-    Size cell_size;
-    int nbins;
-    double win_sigma;
-    double threshold_L2hys;
-    bool gamma_correction;
-    int nlevels;
-
-protected:
-    void computeBlockHistograms(const GpuMat& img);
-    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
-
-    double getWinSigma() const;
-    bool checkDetectorSize() const;
-
-    static int numPartsWithin(int size, int part_size, int stride);
-    static Size numPartsWithin(Size size, Size part_size, Size stride);
-
-    // Coefficients of the separating plane
-    float free_coef;
-    GpuMat detector;
-
-    // Results of the last classification step
-    GpuMat labels, labels_buf;
-    Mat labels_host;
-
-    // Results of the last histogram evaluation step
-    GpuMat block_hists, block_hists_buf;
-
-    // Gradients conputation results
-    GpuMat grad, qangle, grad_buf, qangle_buf;
-
-    // returns subbuffer with required size, reallocates buffer if nessesary.
-    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
-    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
-
-    std::vector<GpuMat> image_scales;
-};
-
-// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
-class CV_EXPORTS CascadeClassifier_GPU
-{
-public:
-    CascadeClassifier_GPU();
-    CascadeClassifier_GPU(const String& filename);
-    ~CascadeClassifier_GPU();
-
-    bool empty() const;
-    bool load(const String& filename);
-    void release();
-
-    /* returns number of detected objects */
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
-
-    bool findLargestObject;
-    bool visualizeInPlace;
-
-    Size getClassifierSize() const;
-
-private:
-    struct CascadeClassifierImpl;
-    CascadeClassifierImpl* impl;
-    struct HaarCascade;
-    struct LbpCascade;
-    friend class CascadeClassifier_GPU_LBP;
-};
-
-}} // namespace cv { namespace gpu {
-
-#endif /* __OPENCV_GPUOBJDETECT_HPP__ */
diff --git a/modules/gpuobjdetect/perf/perf_main.cpp b/modules/gpuobjdetect/perf/perf_main.cpp
deleted file mode 100644
index b35791cda..000000000
--- a/modules/gpuobjdetect/perf/perf_main.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace perf;
-
-CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
diff --git a/modules/gpuobjdetect/perf/perf_precomp.cpp b/modules/gpuobjdetect/perf/perf_precomp.cpp
deleted file mode 100644
index 81f16e8f1..000000000
--- a/modules/gpuobjdetect/perf/perf_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
diff --git a/modules/gpuobjdetect/perf/perf_precomp.hpp b/modules/gpuobjdetect/perf/perf_precomp.hpp
deleted file mode 100644
index 2a1acac7b..000000000
--- a/modules/gpuobjdetect/perf/perf_precomp.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-declarations"
-#  if defined __clang__ || defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#    pragma GCC diagnostic ignored "-Wextra"
-#  endif
-#endif
-
-#ifndef __OPENCV_PERF_PRECOMP_HPP__
-#define __OPENCV_PERF_PRECOMP_HPP__
-
-#include "opencv2/ts.hpp"
-#include "opencv2/ts/gpu_perf.hpp"
-
-#include "opencv2/gpuobjdetect.hpp"
-
-#include "opencv2/objdetect.hpp"
-
-#ifdef GTEST_CREATE_SHARED_LIBRARY
-#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
-#endif
-
-#endif
diff --git a/modules/gpuobjdetect/src/precomp.cpp b/modules/gpuobjdetect/src/precomp.cpp
deleted file mode 100644
index 3c01a2596..000000000
--- a/modules/gpuobjdetect/src/precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
diff --git a/modules/gpuobjdetect/src/precomp.hpp b/modules/gpuobjdetect/src/precomp.hpp
deleted file mode 100644
index 40fd6c46d..000000000
--- a/modules/gpuobjdetect/src/precomp.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_PRECOMP_H__
-#define __OPENCV_PRECOMP_H__
-
-#include "opencv2/gpuobjdetect.hpp"
-#include "opencv2/gpuwarping.hpp"
-#include "opencv2/gpuimgproc.hpp"
-#include "opencv2/gpuarithm.hpp"
-
-#include "opencv2/objdetect.hpp"
-
-#include "opencv2/core/gpu_private.hpp"
-
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_GPULEGACY
-#  include "opencv2/gpulegacy/private.hpp"
-#endif
-
-#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuobjdetect/test/test_main.cpp b/modules/gpuobjdetect/test/test_main.cpp
deleted file mode 100644
index eea3d7c00..000000000
--- a/modules/gpuobjdetect/test/test_main.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpuobjdetect/test/test_precomp.cpp b/modules/gpuobjdetect/test/test_precomp.cpp
deleted file mode 100644
index 0fb652180..000000000
--- a/modules/gpuobjdetect/test/test_precomp.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
diff --git a/modules/gpuobjdetect/test/test_precomp.hpp b/modules/gpuobjdetect/test/test_precomp.hpp
deleted file mode 100644
index 13527ba06..000000000
--- a/modules/gpuobjdetect/test/test_precomp.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-declarations"
-#  if defined __clang__ || defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#    pragma GCC diagnostic ignored "-Wextra"
-#  endif
-#endif
-
-#ifndef __OPENCV_TEST_PRECOMP_HPP__
-#define __OPENCV_TEST_PRECOMP_HPP__
-
-#include <fstream>
-
-#include "opencv2/ts.hpp"
-#include "opencv2/ts/gpu_test.hpp"
-
-#include "opencv2/gpuobjdetect.hpp"
-
-#include "opencv2/objdetect.hpp"
-
-#endif
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index ea1b3e102..84913d284 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -24,7 +24,6 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuvideo/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpustereo/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuobjdetect/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index f03af5605..7fed87be0 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -3,7 +3,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
                                      opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
-                                     opencv_gpufeatures2d opencv_gpuvideo opencv_gpuobjdetect
+                                     opencv_gpufeatures2d opencv_gpuvideo
                                      opencv_gpustereo opencv_gpulegacy)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})

From ac0f506d0e313a3aeaada67656eb195a0da3fb69 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 18:35:09 +0400
Subject: [PATCH 31/49] split gpuvideo onto gpuoptflow and gpubgsegm

---
 modules/gpu/CMakeLists.txt                    |   2 +-
 modules/gpu/include/opencv2/gpu.hpp           |   3 +-
 modules/gpubgsegm/CMakeLists.txt              |   9 +
 .../doc/background_segmentation.rst}          |   0
 modules/gpubgsegm/doc/gpubgsegm.rst           |   8 +
 .../include/opencv2/gpubgsegm.hpp}            | 266 +--------
 modules/gpubgsegm/perf/perf_bgfg.cpp          | 536 ++++++++++++++++++
 .../perf/perf_main.cpp                        |   0
 .../perf/perf_precomp.cpp                     |   0
 .../perf/perf_precomp.hpp                     |   3 +-
 .../{gpuvideo => gpubgsegm}/src/bgfg_gmg.cpp  |   0
 .../{gpuvideo => gpubgsegm}/src/bgfg_mog.cpp  |   0
 .../src/cuda/bgfg_gmg.cu                      |   0
 .../src/cuda/bgfg_mog.cu                      |   0
 .../src/cuda/fgd_bgfg.cu                      |   0
 .../src/cuda/fgd_bgfg_common.hpp              |   0
 .../{gpuvideo => gpubgsegm}/src/fgd_bgfg.cpp  |   0
 .../{gpuvideo => gpubgsegm}/src/precomp.cpp   |   0
 .../{gpuvideo => gpubgsegm}/src/precomp.hpp   |   3 +-
 .../test/test_bgfg.cpp                        |   0
 .../test/test_main.cpp                        |   0
 .../test/test_precomp.cpp                     |   0
 modules/gpubgsegm/test/test_precomp.hpp       |  64 +++
 modules/gpuimgproc/doc/image_processing.rst   |  31 +
 modules/gpuimgproc/perf/perf_imgproc.cpp      |  39 ++
 modules/gpuoptflow/CMakeLists.txt             |   9 +
 modules/gpuoptflow/doc/gpuoptflow.rst         |   8 +
 modules/gpuoptflow/doc/optflow.rst            | 238 ++++++++
 .../gpuoptflow/include/opencv2/gpuoptflow.hpp | 310 ++++++++++
 modules/gpuoptflow/perf/perf_main.cpp         |  47 ++
 .../perf/perf_optflow.cpp}                    | 528 -----------------
 modules/gpuoptflow/perf/perf_precomp.cpp      |  43 ++
 modules/gpuoptflow/perf/perf_precomp.hpp      |  66 +++
 .../src/cuda/optflowbm.cu                     |   0
 .../src/cuda/optical_flow.cu                  |   0
 .../src/cuda/optical_flow_farneback.cu        |   0
 .../src/cuda/pyrlk.cu                         |   0
 .../src/cuda/tvl1flow.cu                      |   0
 .../src/optflowbm.cpp                         |   0
 .../src/optical_flow.cpp                      |   0
 .../src/optical_flow_farneback.cpp            |   0
 modules/gpuoptflow/src/precomp.cpp            |  43 ++
 modules/gpuoptflow/src/precomp.hpp            |  62 ++
 .../{gpuvideo => gpuoptflow}/src/pyrlk.cpp    |   0
 .../{gpuvideo => gpuoptflow}/src/tvl1flow.cpp |   0
 modules/gpuoptflow/test/test_main.cpp         |  45 ++
 .../test/test_optflow.cpp                     |  82 ---
 modules/gpuoptflow/test/test_precomp.cpp      |  43 ++
 .../test/test_precomp.hpp                     |   3 +-
 modules/gpuvideo/CMakeLists.txt               |   9 -
 modules/gpuvideo/doc/gpuvideo.rst             |   8 -
 samples/cpp/CMakeLists.txt                    |   3 +-
 samples/gpu/CMakeLists.txt                    |   2 +-
 53 files changed, 1613 insertions(+), 900 deletions(-)
 create mode 100644 modules/gpubgsegm/CMakeLists.txt
 rename modules/{gpuvideo/doc/video.rst => gpubgsegm/doc/background_segmentation.rst} (100%)
 create mode 100644 modules/gpubgsegm/doc/gpubgsegm.rst
 rename modules/{gpuvideo/include/opencv2/gpuvideo.hpp => gpubgsegm/include/opencv2/gpubgsegm.hpp} (60%)
 create mode 100644 modules/gpubgsegm/perf/perf_bgfg.cpp
 rename modules/{gpuvideo => gpubgsegm}/perf/perf_main.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/perf/perf_precomp.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/perf/perf_precomp.hpp (97%)
 rename modules/{gpuvideo => gpubgsegm}/src/bgfg_gmg.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/bgfg_mog.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/cuda/bgfg_gmg.cu (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/cuda/bgfg_mog.cu (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/cuda/fgd_bgfg.cu (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/cuda/fgd_bgfg_common.hpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/fgd_bgfg.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/precomp.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/src/precomp.hpp (97%)
 rename modules/{gpuvideo => gpubgsegm}/test/test_bgfg.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/test/test_main.cpp (100%)
 rename modules/{gpuvideo => gpubgsegm}/test/test_precomp.cpp (100%)
 create mode 100644 modules/gpubgsegm/test/test_precomp.hpp
 create mode 100644 modules/gpuoptflow/CMakeLists.txt
 create mode 100644 modules/gpuoptflow/doc/gpuoptflow.rst
 create mode 100644 modules/gpuoptflow/doc/optflow.rst
 create mode 100644 modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
 create mode 100644 modules/gpuoptflow/perf/perf_main.cpp
 rename modules/{gpuvideo/perf/perf_video.cpp => gpuoptflow/perf/perf_optflow.cpp} (53%)
 create mode 100644 modules/gpuoptflow/perf/perf_precomp.cpp
 create mode 100644 modules/gpuoptflow/perf/perf_precomp.hpp
 rename modules/{gpuvideo => gpuoptflow}/src/cuda/optflowbm.cu (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/cuda/optical_flow.cu (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/cuda/optical_flow_farneback.cu (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/cuda/pyrlk.cu (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/cuda/tvl1flow.cu (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/optflowbm.cpp (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/optical_flow.cpp (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/optical_flow_farneback.cpp (100%)
 create mode 100644 modules/gpuoptflow/src/precomp.cpp
 create mode 100644 modules/gpuoptflow/src/precomp.hpp
 rename modules/{gpuvideo => gpuoptflow}/src/pyrlk.cpp (100%)
 rename modules/{gpuvideo => gpuoptflow}/src/tvl1flow.cpp (100%)
 create mode 100644 modules/gpuoptflow/test/test_main.cpp
 rename modules/{gpuvideo => gpuoptflow}/test/test_optflow.cpp (89%)
 create mode 100644 modules/gpuoptflow/test/test_precomp.cpp
 rename modules/{gpuvideo => gpuoptflow}/test/test_precomp.hpp (98%)
 delete mode 100644 modules/gpuvideo/CMakeLists.txt
 delete mode 100644 modules/gpuvideo/doc/gpuvideo.rst

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 662a652b9..1d108b00e 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -7,7 +7,7 @@ set(the_description "GPU-accelerated Computer Vision")
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
 ocv_define_module(gpu opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
-                      opencv_gpufeatures2d opencv_gpuvideo opencv_gpustereo)
+                      opencv_gpufeatures2d opencv_gpuoptflow opencv_gpubgsegm opencv_gpustereo)
 
 if(HAVE_CUDA)
   add_subdirectory(perf4au)
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index db6157ab7..166a360a1 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -49,7 +49,8 @@
 #include "opencv2/gpuwarping.hpp"
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpufeatures2d.hpp"
-#include "opencv2/gpuvideo.hpp"
+#include "opencv2/gpuoptflow.hpp"
+#include "opencv2/gpubgsegm.hpp"
 #include "opencv2/gpustereo.hpp"
 
 namespace cv { namespace gpu {
diff --git a/modules/gpubgsegm/CMakeLists.txt b/modules/gpubgsegm/CMakeLists.txt
new file mode 100644
index 000000000..4e3266b7e
--- /dev/null
+++ b/modules/gpubgsegm/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpubgsegm)
+endif()
+
+set(the_description "GPU-accelerated Background Segmentation")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpubgsegm opencv_video opencv_legacy opencv_gpufilters opencv_gpuimgproc)
diff --git a/modules/gpuvideo/doc/video.rst b/modules/gpubgsegm/doc/background_segmentation.rst
similarity index 100%
rename from modules/gpuvideo/doc/video.rst
rename to modules/gpubgsegm/doc/background_segmentation.rst
diff --git a/modules/gpubgsegm/doc/gpubgsegm.rst b/modules/gpubgsegm/doc/gpubgsegm.rst
new file mode 100644
index 000000000..f4988cb21
--- /dev/null
+++ b/modules/gpubgsegm/doc/gpubgsegm.rst
@@ -0,0 +1,8 @@
+********************************************
+gpu. GPU-accelerated Background Segmentation
+********************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    background_segmentation
diff --git a/modules/gpuvideo/include/opencv2/gpuvideo.hpp b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
similarity index 60%
rename from modules/gpuvideo/include/opencv2/gpuvideo.hpp
rename to modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
index de3cf7fa7..62d4d9a4c 100644
--- a/modules/gpuvideo/include/opencv2/gpuvideo.hpp
+++ b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_GPUVIDEO_HPP__
-#define __OPENCV_GPUVIDEO_HPP__
+#ifndef __OPENCV_GPUBGSEGM_HPP__
+#define __OPENCV_GPUBGSEGM_HPP__
 
 #include <memory>
 
@@ -50,266 +50,6 @@
 
 namespace cv { namespace gpu {
 
-////////////////////////////////// Optical Flow //////////////////////////////////////////
-
-class CV_EXPORTS BroxOpticalFlow
-{
-public:
-    BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
-        alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
-        inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
-    {
-    }
-
-    //! Compute optical flow
-    //! frame0 - source frame (supports only CV_32FC1 type)
-    //! frame1 - frame to track (with the same size and type as frame0)
-    //! u      - flow horizontal component (along x axis)
-    //! v      - flow vertical component (along y axis)
-    void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
-
-    //! flow smoothness
-    float alpha;
-
-    //! gradient constancy importance
-    float gamma;
-
-    //! pyramid scale factor
-    float scale_factor;
-
-    //! number of lagged non-linearity iterations (inner loop)
-    int inner_iterations;
-
-    //! number of warping iterations (number of pyramid levels)
-    int outer_iterations;
-
-    //! number of linear system solver iterations
-    int solver_iterations;
-
-    GpuMat buf;
-};
-
-class CV_EXPORTS PyrLKOpticalFlow
-{
-public:
-    PyrLKOpticalFlow();
-
-    void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
-        GpuMat& status, GpuMat* err = 0);
-
-    void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
-
-    void releaseMemory();
-
-    Size winSize;
-    int maxLevel;
-    int iters;
-    bool useInitialFlow;
-
-private:
-    std::vector<GpuMat> prevPyr_;
-    std::vector<GpuMat> nextPyr_;
-
-    GpuMat buf_;
-
-    GpuMat uPyr_[2];
-    GpuMat vPyr_[2];
-};
-
-class CV_EXPORTS FarnebackOpticalFlow
-{
-public:
-    FarnebackOpticalFlow()
-    {
-        numLevels = 5;
-        pyrScale = 0.5;
-        fastPyramids = false;
-        winSize = 13;
-        numIters = 10;
-        polyN = 5;
-        polySigma = 1.1;
-        flags = 0;
-    }
-
-    int numLevels;
-    double pyrScale;
-    bool fastPyramids;
-    int winSize;
-    int numIters;
-    int polyN;
-    double polySigma;
-    int flags;
-
-    void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
-
-    void releaseMemory()
-    {
-        frames_[0].release();
-        frames_[1].release();
-        pyrLevel_[0].release();
-        pyrLevel_[1].release();
-        M_.release();
-        bufM_.release();
-        R_[0].release();
-        R_[1].release();
-        blurredFrame_[0].release();
-        blurredFrame_[1].release();
-        pyramid0_.clear();
-        pyramid1_.clear();
-    }
-
-private:
-    void prepareGaussian(
-            int n, double sigma, float *g, float *xg, float *xxg,
-            double &ig11, double &ig03, double &ig33, double &ig55);
-
-    void setPolynomialExpansionConsts(int n, double sigma);
-
-    void updateFlow_boxFilter(
-            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
-            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
-
-    void updateFlow_gaussianBlur(
-            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
-            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
-
-    GpuMat frames_[2];
-    GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
-    std::vector<GpuMat> pyramid0_, pyramid1_;
-};
-
-// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
-//
-// see reference:
-//   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
-//   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
-class CV_EXPORTS OpticalFlowDual_TVL1_GPU
-{
-public:
-    OpticalFlowDual_TVL1_GPU();
-
-    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
-
-    void collectGarbage();
-
-    /**
-     * Time step of the numerical scheme.
-     */
-    double tau;
-
-    /**
-     * Weight parameter for the data term, attachment parameter.
-     * This is the most relevant parameter, which determines the smoothness of the output.
-     * The smaller this parameter is, the smoother the solutions we obtain.
-     * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
-     */
-    double lambda;
-
-    /**
-     * Weight parameter for (u - v)^2, tightness parameter.
-     * It serves as a link between the attachment and the regularization terms.
-     * In theory, it should have a small value in order to maintain both parts in correspondence.
-     * The method is stable for a large range of values of this parameter.
-     */
-    double theta;
-
-    /**
-     * Number of scales used to create the pyramid of images.
-     */
-    int nscales;
-
-    /**
-     * Number of warpings per scale.
-     * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
-     * This is a parameter that assures the stability of the method.
-     * It also affects the running time, so it is a compromise between speed and accuracy.
-     */
-    int warps;
-
-    /**
-     * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
-     * A small value will yield more accurate solutions at the expense of a slower convergence.
-     */
-    double epsilon;
-
-    /**
-     * Stopping criterion iterations number used in the numerical scheme.
-     */
-    int iterations;
-
-    double scaleStep;
-
-    bool useInitialFlow;
-
-private:
-    void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2);
-
-    std::vector<GpuMat> I0s;
-    std::vector<GpuMat> I1s;
-    std::vector<GpuMat> u1s;
-    std::vector<GpuMat> u2s;
-
-    GpuMat I1x_buf;
-    GpuMat I1y_buf;
-
-    GpuMat I1w_buf;
-    GpuMat I1wx_buf;
-    GpuMat I1wy_buf;
-
-    GpuMat grad_buf;
-    GpuMat rho_c_buf;
-
-    GpuMat p11_buf;
-    GpuMat p12_buf;
-    GpuMat p21_buf;
-    GpuMat p22_buf;
-
-    GpuMat diff_buf;
-    GpuMat norm_buf;
-};
-
-//! Calculates optical flow for 2 images using block matching algorithm */
-CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
-                                  Size block_size, Size shift_size, Size max_range, bool use_previous,
-                                  GpuMat& velx, GpuMat& vely, GpuMat& buf,
-                                  Stream& stream = Stream::Null());
-
-class CV_EXPORTS FastOpticalFlowBM
-{
-public:
-    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
-
-private:
-    GpuMat buffer;
-    GpuMat extended_I0;
-    GpuMat extended_I1;
-};
-
-
-//! Interpolate frames (images) using provided optical flow (displacement field).
-//! frame0   - frame 0 (32-bit floating point images, single channel)
-//! frame1   - frame 1 (the same type and size)
-//! fu       - forward horizontal displacement
-//! fv       - forward vertical displacement
-//! bu       - backward horizontal displacement
-//! bv       - backward vertical displacement
-//! pos      - new frame position
-//! newFrame - new frame
-//! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat;
-//!            occlusion masks            0, occlusion masks            1,
-//!            interpolated forward flow  0, interpolated forward flow  1,
-//!            interpolated backward flow 0, interpolated backward flow 1
-//!
-CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
-                                  const GpuMat& fu, const GpuMat& fv,
-                                  const GpuMat& bu, const GpuMat& bv,
-                                  float pos, GpuMat& newFrame, GpuMat& buf,
-                                  Stream& stream = Stream::Null());
-
-CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
-
-//////////////////////// Background/foreground segmentation ////////////////////////
-
 // Foreground Object Detection from Videos Containing Complex Background.
 // Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian.
 // ACM MM2003 9p
@@ -583,4 +323,4 @@ private:
 
 }} // namespace cv { namespace gpu {
 
-#endif /* __OPENCV_GPUVIDEO_HPP__ */
+#endif /* __OPENCV_GPUBGSEGM_HPP__ */
diff --git a/modules/gpubgsegm/perf/perf_bgfg.cpp b/modules/gpubgsegm/perf/perf_bgfg.cpp
new file mode 100644
index 000000000..23db3d899
--- /dev/null
+++ b/modules/gpubgsegm/perf/perf_bgfg.cpp
@@ -0,0 +1,536 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+#if defined(HAVE_XINE)         || \
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_AVFOUNDATION) || \
+    defined(HAVE_FFMPEG)       || \
+    defined(WIN32) /* assume that we have ffmpeg */
+
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
+#else
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
+#endif
+
+namespace cv
+{
+    template<> void Ptr<CvBGStatModel>::delete_obj()
+    {
+        cvReleaseBGStatModel(&obj);
+    }
+}
+
+//////////////////////////////////////////////////////
+// FGDStatModel
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+DEF_PARAM_TEST_1(Video, string);
+
+PERF_TEST_P(Video, Video_FGDStatModel,
+            Values(string("gpu/video/768x576.avi")))
+{
+    declare.time(60);
+
+    const string inputFile = perf::TestBase::getDataPath(GetParam());
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat d_frame(frame);
+
+        cv::gpu::FGDStatModel d_model(4);
+        d_model.create(d_frame);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            d_frame.upload(frame);
+
+            startTimer(); next();
+            d_model.update(d_frame);
+            stopTimer();
+        }
+
+        const cv::gpu::GpuMat background = d_model.background;
+        const cv::gpu::GpuMat foreground = d_model.foreground;
+
+        GPU_SANITY_CHECK(background, 1e-2, ERROR_RELATIVE);
+        GPU_SANITY_CHECK(foreground, 1e-2, ERROR_RELATIVE);
+    }
+    else
+    {
+        IplImage ipl_frame = frame;
+        cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            ipl_frame = frame;
+
+            startTimer(); next();
+            cvUpdateBGStatModel(&ipl_frame, model);
+            stopTimer();
+        }
+
+        const cv::Mat background = cv::cvarrToMat(model->background);
+        const cv::Mat foreground = cv::cvarrToMat(model->foreground);
+
+        CPU_SANITY_CHECK(background);
+        CPU_SANITY_CHECK(foreground);
+    }
+}
+
+#endif
+
+//////////////////////////////////////////////////////
+// MOG
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+DEF_PARAM_TEST(Video_Cn_LearningRate, string, MatCn, double);
+
+PERF_TEST_P(Video_Cn_LearningRate, Video_MOG,
+            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+                    GPU_CHANNELS_1_3_4,
+                    Values(0.0, 0.01)))
+{
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
+    const float learningRate = static_cast<float>(GET_PARAM(2));
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat d_frame(frame);
+        cv::gpu::MOG_GPU d_mog;
+        cv::gpu::GpuMat foreground;
+
+        d_mog(d_frame, foreground, learningRate);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            d_frame.upload(frame);
+
+            startTimer(); next();
+            d_mog(d_frame, foreground, learningRate);
+            stopTimer();
+        }
+
+        GPU_SANITY_CHECK(foreground);
+    }
+    else
+    {
+        cv::Ptr<cv::BackgroundSubtractor> mog = cv::createBackgroundSubtractorMOG();
+        cv::Mat foreground;
+
+        mog->apply(frame, foreground, learningRate);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            startTimer(); next();
+            mog->apply(frame, foreground, learningRate);
+            stopTimer();
+        }
+
+        CPU_SANITY_CHECK(foreground);
+    }
+}
+
+#endif
+
+//////////////////////////////////////////////////////
+// MOG2
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+DEF_PARAM_TEST(Video_Cn, string, int);
+
+PERF_TEST_P(Video_Cn, Video_MOG2,
+            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+                    GPU_CHANNELS_1_3_4))
+{
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::MOG2_GPU d_mog2;
+        d_mog2.bShadowDetection = false;
+
+        cv::gpu::GpuMat d_frame(frame);
+        cv::gpu::GpuMat foreground;
+
+        d_mog2(d_frame, foreground);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            d_frame.upload(frame);
+
+            startTimer(); next();
+            d_mog2(d_frame, foreground);
+            stopTimer();
+        }
+
+        GPU_SANITY_CHECK(foreground);
+    }
+    else
+    {
+        cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
+        mog2->set("detectShadows", false);
+
+        cv::Mat foreground;
+
+        mog2->apply(frame, foreground);
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            startTimer(); next();
+            mog2->apply(frame, foreground);
+            stopTimer();
+        }
+
+        CPU_SANITY_CHECK(foreground);
+    }
+}
+
+#endif
+
+//////////////////////////////////////////////////////
+// MOG2GetBackgroundImage
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
+            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+                    GPU_CHANNELS_1_3_4))
+{
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat d_frame;
+        cv::gpu::MOG2_GPU d_mog2;
+        cv::gpu::GpuMat d_foreground;
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            d_frame.upload(frame);
+
+            d_mog2(d_frame, d_foreground);
+        }
+
+        cv::gpu::GpuMat background;
+
+        TEST_CYCLE() d_mog2.getBackgroundImage(background);
+
+        GPU_SANITY_CHECK(background, 1);
+    }
+    else
+    {
+        cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
+        cv::Mat foreground;
+
+        for (int i = 0; i < 10; ++i)
+        {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            mog2->apply(frame, foreground);
+        }
+
+        cv::Mat background;
+
+        TEST_CYCLE() mog2->getBackgroundImage(background);
+
+        CPU_SANITY_CHECK(background);
+    }
+}
+
+#endif
+
+//////////////////////////////////////////////////////
+// GMG
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+DEF_PARAM_TEST(Video_Cn_MaxFeatures, string, MatCn, int);
+
+PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
+            Combine(Values(string("gpu/video/768x576.avi")),
+                    GPU_CHANNELS_1_3_4,
+                    Values(20, 40, 60)))
+{
+    const std::string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
+    const int maxFeatures = GET_PARAM(2);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat d_frame(frame);
+        cv::gpu::GpuMat foreground;
+
+        cv::gpu::GMG_GPU d_gmg;
+        d_gmg.maxFeatures = maxFeatures;
+
+        d_gmg(d_frame, foreground);
+
+        for (int i = 0; i < 150; ++i)
+        {
+            cap >> frame;
+            if (frame.empty())
+            {
+                cap.release();
+                cap.open(inputFile);
+                cap >> frame;
+            }
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            d_frame.upload(frame);
+
+            startTimer(); next();
+            d_gmg(d_frame, foreground);
+            stopTimer();
+        }
+
+        GPU_SANITY_CHECK(foreground);
+    }
+    else
+    {
+        cv::Mat foreground;
+        cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
+
+        cv::Ptr<cv::BackgroundSubtractor> gmg = cv::createBackgroundSubtractorGMG();
+        gmg->set("maxFeatures", maxFeatures);
+        //gmg.initialize(frame.size(), 0.0, 255.0);
+
+        gmg->apply(frame, foreground);
+
+        for (int i = 0; i < 150; ++i)
+        {
+            cap >> frame;
+            if (frame.empty())
+            {
+                cap.release();
+                cap.open(inputFile);
+                cap >> frame;
+            }
+
+            if (cn != 3)
+            {
+                cv::Mat temp;
+                if (cn == 1)
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+                else
+                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+                cv::swap(temp, frame);
+            }
+
+            startTimer(); next();
+            gmg->apply(frame, foreground);
+            stopTimer();
+        }
+
+        CPU_SANITY_CHECK(foreground);
+    }
+}
+
+#endif
diff --git a/modules/gpuvideo/perf/perf_main.cpp b/modules/gpubgsegm/perf/perf_main.cpp
similarity index 100%
rename from modules/gpuvideo/perf/perf_main.cpp
rename to modules/gpubgsegm/perf/perf_main.cpp
diff --git a/modules/gpuvideo/perf/perf_precomp.cpp b/modules/gpubgsegm/perf/perf_precomp.cpp
similarity index 100%
rename from modules/gpuvideo/perf/perf_precomp.cpp
rename to modules/gpubgsegm/perf/perf_precomp.cpp
diff --git a/modules/gpuvideo/perf/perf_precomp.hpp b/modules/gpubgsegm/perf/perf_precomp.hpp
similarity index 97%
rename from modules/gpuvideo/perf/perf_precomp.hpp
rename to modules/gpubgsegm/perf/perf_precomp.hpp
index ecb314900..3343fe30a 100644
--- a/modules/gpuvideo/perf/perf_precomp.hpp
+++ b/modules/gpubgsegm/perf/perf_precomp.hpp
@@ -54,8 +54,7 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
-#include "opencv2/gpuvideo.hpp"
-#include "opencv2/gpuimgproc.hpp"
+#include "opencv2/gpubgsegm.hpp"
 
 #include "opencv2/video.hpp"
 #include "opencv2/legacy.hpp"
diff --git a/modules/gpuvideo/src/bgfg_gmg.cpp b/modules/gpubgsegm/src/bgfg_gmg.cpp
similarity index 100%
rename from modules/gpuvideo/src/bgfg_gmg.cpp
rename to modules/gpubgsegm/src/bgfg_gmg.cpp
diff --git a/modules/gpuvideo/src/bgfg_mog.cpp b/modules/gpubgsegm/src/bgfg_mog.cpp
similarity index 100%
rename from modules/gpuvideo/src/bgfg_mog.cpp
rename to modules/gpubgsegm/src/bgfg_mog.cpp
diff --git a/modules/gpuvideo/src/cuda/bgfg_gmg.cu b/modules/gpubgsegm/src/cuda/bgfg_gmg.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/bgfg_gmg.cu
rename to modules/gpubgsegm/src/cuda/bgfg_gmg.cu
diff --git a/modules/gpuvideo/src/cuda/bgfg_mog.cu b/modules/gpubgsegm/src/cuda/bgfg_mog.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/bgfg_mog.cu
rename to modules/gpubgsegm/src/cuda/bgfg_mog.cu
diff --git a/modules/gpuvideo/src/cuda/fgd_bgfg.cu b/modules/gpubgsegm/src/cuda/fgd_bgfg.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/fgd_bgfg.cu
rename to modules/gpubgsegm/src/cuda/fgd_bgfg.cu
diff --git a/modules/gpuvideo/src/cuda/fgd_bgfg_common.hpp b/modules/gpubgsegm/src/cuda/fgd_bgfg_common.hpp
similarity index 100%
rename from modules/gpuvideo/src/cuda/fgd_bgfg_common.hpp
rename to modules/gpubgsegm/src/cuda/fgd_bgfg_common.hpp
diff --git a/modules/gpuvideo/src/fgd_bgfg.cpp b/modules/gpubgsegm/src/fgd_bgfg.cpp
similarity index 100%
rename from modules/gpuvideo/src/fgd_bgfg.cpp
rename to modules/gpubgsegm/src/fgd_bgfg.cpp
diff --git a/modules/gpuvideo/src/precomp.cpp b/modules/gpubgsegm/src/precomp.cpp
similarity index 100%
rename from modules/gpuvideo/src/precomp.cpp
rename to modules/gpubgsegm/src/precomp.cpp
diff --git a/modules/gpuvideo/src/precomp.hpp b/modules/gpubgsegm/src/precomp.hpp
similarity index 97%
rename from modules/gpuvideo/src/precomp.hpp
rename to modules/gpubgsegm/src/precomp.hpp
index a1e1b47d5..9f9dcb94c 100644
--- a/modules/gpuvideo/src/precomp.hpp
+++ b/modules/gpubgsegm/src/precomp.hpp
@@ -45,11 +45,10 @@
 
 #include <limits>
 
-#include "opencv2/gpuvideo.hpp"
+#include "opencv2/gpubgsegm.hpp"
 
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
-#include "opencv2/gpuwarping.hpp"
 #include "opencv2/gpuimgproc.hpp"
 
 #include "opencv2/video.hpp"
diff --git a/modules/gpuvideo/test/test_bgfg.cpp b/modules/gpubgsegm/test/test_bgfg.cpp
similarity index 100%
rename from modules/gpuvideo/test/test_bgfg.cpp
rename to modules/gpubgsegm/test/test_bgfg.cpp
diff --git a/modules/gpuvideo/test/test_main.cpp b/modules/gpubgsegm/test/test_main.cpp
similarity index 100%
rename from modules/gpuvideo/test/test_main.cpp
rename to modules/gpubgsegm/test/test_main.cpp
diff --git a/modules/gpuvideo/test/test_precomp.cpp b/modules/gpubgsegm/test/test_precomp.cpp
similarity index 100%
rename from modules/gpuvideo/test/test_precomp.cpp
rename to modules/gpubgsegm/test/test_precomp.cpp
diff --git a/modules/gpubgsegm/test/test_precomp.hpp b/modules/gpubgsegm/test/test_precomp.hpp
new file mode 100644
index 000000000..3c6ad197b
--- /dev/null
+++ b/modules/gpubgsegm/test/test_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include <fstream>
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpubgsegm.hpp"
+
+#include "opencv2/video.hpp"
+#include "opencv2/legacy.hpp"
+
+#endif
diff --git a/modules/gpuimgproc/doc/image_processing.rst b/modules/gpuimgproc/doc/image_processing.rst
index fed3aaf25..352288672 100644
--- a/modules/gpuimgproc/doc/image_processing.rst
+++ b/modules/gpuimgproc/doc/image_processing.rst
@@ -556,3 +556,34 @@ Downloads results from :ocv:func:`gpu::HoughCircles` to host memory.
     :param h_circles: Output host array.
 
 .. seealso:: :ocv:func:`gpu::HoughCircles`
+
+
+
+gpu::GoodFeaturesToTrackDetector_GPU
+------------------------------------
+.. ocv:class:: gpu::GoodFeaturesToTrackDetector_GPU
+
+Class used for strong corners detection on an image. ::
+
+    class GoodFeaturesToTrackDetector_GPU
+    {
+    public:
+        explicit GoodFeaturesToTrackDetector_GPU(int maxCorners_ = 1000, double qualityLevel_ = 0.01, double minDistance_ = 0.0,
+            int blockSize_ = 3, bool useHarrisDetector_ = false, double harrisK_ = 0.04);
+
+        void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
+
+        int maxCorners;
+        double qualityLevel;
+        double minDistance;
+
+        int blockSize;
+        bool useHarrisDetector;
+        double harrisK;
+
+        void releaseMemory();
+    };
+
+The class finds the most prominent corners in the image.
+
+.. seealso:: :ocv:func:`goodFeaturesToTrack`
diff --git a/modules/gpuimgproc/perf/perf_imgproc.cpp b/modules/gpuimgproc/perf/perf_imgproc.cpp
index 73e298855..fcfafef5c 100644
--- a/modules/gpuimgproc/perf/perf_imgproc.cpp
+++ b/modules/gpuimgproc/perf/perf_imgproc.cpp
@@ -1092,3 +1092,42 @@ PERF_TEST_P(Sz_Depth_Cn_KernelSz, BilateralFilter,
         CPU_SANITY_CHECK(dst);
     }
 }
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+
+DEF_PARAM_TEST(Image_MinDistance, string, double);
+
+PERF_TEST_P(Image_MinDistance, GoodFeaturesToTrack,
+            Combine(Values<string>("gpu/perf/aloe.png"),
+                    Values(0.0, 3.0)))
+{
+    const string fileName = GET_PARAM(0);
+    const double minDistance = GET_PARAM(1);
+
+    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    const int maxCorners = 8000;
+    const double qualityLevel = 0.01;
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance);
+
+        const cv::gpu::GpuMat d_image(image);
+        cv::gpu::GpuMat pts;
+
+        TEST_CYCLE() d_detector(d_image, pts);
+
+        GPU_SANITY_CHECK(pts);
+    }
+    else
+    {
+        cv::Mat pts;
+
+        TEST_CYCLE() cv::goodFeaturesToTrack(image, pts, maxCorners, qualityLevel, minDistance);
+
+        CPU_SANITY_CHECK(pts);
+    }
+}
diff --git a/modules/gpuoptflow/CMakeLists.txt b/modules/gpuoptflow/CMakeLists.txt
new file mode 100644
index 000000000..120262d25
--- /dev/null
+++ b/modules/gpuoptflow/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpuoptflow)
+endif()
+
+set(the_description "GPU-accelerated Optical Flow")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpuoptflow opencv_video opencv_legacy opencv_gpuarithm opencv_gpuwarping opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuoptflow/doc/gpuoptflow.rst b/modules/gpuoptflow/doc/gpuoptflow.rst
new file mode 100644
index 000000000..925f39338
--- /dev/null
+++ b/modules/gpuoptflow/doc/gpuoptflow.rst
@@ -0,0 +1,8 @@
+****************************************
+gpuoptflow. GPU-accelerated Optical Flow
+****************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    optflow
diff --git a/modules/gpuoptflow/doc/optflow.rst b/modules/gpuoptflow/doc/optflow.rst
new file mode 100644
index 000000000..d8f153610
--- /dev/null
+++ b/modules/gpuoptflow/doc/optflow.rst
@@ -0,0 +1,238 @@
+Video Analysis
+==============
+
+.. highlight:: cpp
+
+
+
+gpu::BroxOpticalFlow
+--------------------
+.. ocv:class:: gpu::BroxOpticalFlow
+
+Class computing the optical flow for two images using Brox et al Optical Flow algorithm ([Brox2004]_). ::
+
+    class BroxOpticalFlow
+    {
+    public:
+        BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_);
+
+        //! Compute optical flow
+        //! frame0 - source frame (supports only CV_32FC1 type)
+        //! frame1 - frame to track (with the same size and type as frame0)
+        //! u      - flow horizontal component (along x axis)
+        //! v      - flow vertical component (along y axis)
+        void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
+
+        //! flow smoothness
+        float alpha;
+
+        //! gradient constancy importance
+        float gamma;
+
+        //! pyramid scale factor
+        float scale_factor;
+
+        //! number of lagged non-linearity iterations (inner loop)
+        int inner_iterations;
+
+        //! number of warping iterations (number of pyramid levels)
+        int outer_iterations;
+
+        //! number of linear system solver iterations
+        int solver_iterations;
+
+        GpuMat buf;
+    };
+
+
+
+gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU
+---------------------------------------------------------------------
+Constructor.
+
+.. ocv:function:: gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04)
+
+    :param maxCorners: Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned.
+
+    :param qualityLevel: Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see  :ocv:func:`gpu::cornerMinEigenVal` ) or the Harris function response (see  :ocv:func:`gpu::cornerHarris` ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the  ``qualityLevel=0.01`` , then all the corners with the quality measure less than 15 are rejected.
+
+    :param minDistance: Minimum possible Euclidean distance between the returned corners.
+
+    :param blockSize: Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See  :ocv:func:`cornerEigenValsAndVecs` .
+
+    :param useHarrisDetector: Parameter indicating whether to use a Harris detector (see :ocv:func:`gpu::cornerHarris`) or :ocv:func:`gpu::cornerMinEigenVal`.
+
+    :param harrisK: Free parameter of the Harris detector.
+
+
+gpu::FarnebackOpticalFlow
+-------------------------
+.. ocv:class:: gpu::FarnebackOpticalFlow
+
+Class computing a dense optical flow using the Gunnar Farneback’s algorithm. ::
+
+    class CV_EXPORTS FarnebackOpticalFlow
+    {
+    public:
+        FarnebackOpticalFlow()
+        {
+            numLevels = 5;
+            pyrScale = 0.5;
+            fastPyramids = false;
+            winSize = 13;
+            numIters = 10;
+            polyN = 5;
+            polySigma = 1.1;
+            flags = 0;
+        }
+
+        int numLevels;
+        double pyrScale;
+        bool fastPyramids;
+        int winSize;
+        int numIters;
+        int polyN;
+        double polySigma;
+        int flags;
+
+        void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
+
+        void releaseMemory();
+
+    private:
+        /* hidden */
+    };
+
+
+
+gpu::FarnebackOpticalFlow::operator ()
+--------------------------------------
+Computes a dense optical flow using the Gunnar Farneback’s algorithm.
+
+.. ocv:function:: void gpu::FarnebackOpticalFlow::operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null())
+
+    :param frame0: First 8-bit gray-scale input image
+    :param frame1: Second 8-bit gray-scale input image
+    :param flowx: Flow horizontal component
+    :param flowy: Flow vertical component
+    :param s: Stream
+
+.. seealso:: :ocv:func:`calcOpticalFlowFarneback`
+
+
+
+gpu::FarnebackOpticalFlow::releaseMemory
+----------------------------------------
+Releases unused auxiliary memory buffers.
+
+.. ocv:function:: void gpu::FarnebackOpticalFlow::releaseMemory()
+
+
+
+gpu::PyrLKOpticalFlow
+---------------------
+.. ocv:class:: gpu::PyrLKOpticalFlow
+
+Class used for calculating an optical flow. ::
+
+    class PyrLKOpticalFlow
+    {
+    public:
+        PyrLKOpticalFlow();
+
+        void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
+            GpuMat& status, GpuMat* err = 0);
+
+        void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
+
+        Size winSize;
+        int maxLevel;
+        int iters;
+        bool useInitialFlow;
+
+        void releaseMemory();
+    };
+
+The class can calculate an optical flow for a sparse feature set or dense optical flow using the iterative Lucas-Kanade method with pyramids.
+
+.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
+
+
+
+gpu::PyrLKOpticalFlow::sparse
+-----------------------------
+Calculate an optical flow for a sparse feature set.
+
+.. ocv:function:: void gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err = 0)
+
+    :param prevImg: First 8-bit input image (supports both grayscale and color images).
+
+    :param nextImg: Second input image of the same size and the same type as  ``prevImg`` .
+
+    :param prevPts: Vector of 2D points for which the flow needs to be found. It must be one row matrix with CV_32FC2 type.
+
+    :param nextPts: Output vector of 2D points (with single-precision floating-point coordinates) containing the calculated new positions of input features in the second image. When ``useInitialFlow`` is true, the vector must have the same size as in the input.
+
+    :param status: Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the flow for the corresponding features has been found. Otherwise, it is set to 0.
+
+    :param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
+
+.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
+
+
+
+gpu::PyrLKOpticalFlow::dense
+-----------------------------
+Calculate dense optical flow.
+
+.. ocv:function:: void gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0)
+
+    :param prevImg: First 8-bit grayscale input image.
+
+    :param nextImg: Second input image of the same size and the same type as  ``prevImg`` .
+
+    :param u: Horizontal component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
+
+    :param v: Vertical component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
+
+    :param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
+
+
+
+gpu::PyrLKOpticalFlow::releaseMemory
+------------------------------------
+Releases inner buffers memory.
+
+.. ocv:function:: void gpu::PyrLKOpticalFlow::releaseMemory()
+
+
+
+gpu::interpolateFrames
+----------------------
+Interpolates frames (images) using provided optical flow (displacement field).
+
+.. ocv:function:: void gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv, float pos, GpuMat& newFrame, GpuMat& buf, Stream& stream = Stream::Null())
+
+    :param frame0: First frame (32-bit floating point images, single channel).
+
+    :param frame1: Second frame. Must have the same type and size as ``frame0`` .
+
+    :param fu: Forward horizontal displacement.
+
+    :param fv: Forward vertical displacement.
+
+    :param bu: Backward horizontal displacement.
+
+    :param bv: Backward vertical displacement.
+
+    :param pos: New frame position.
+
+    :param newFrame: Output image.
+
+    :param buf: Temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, interpolated backward vertical flow.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+.. [Brox2004] T. Brox, A. Bruhn, N. Papenberg, J. Weickert. *High accuracy optical flow estimation based on a theory for warping*. ECCV 2004.
diff --git a/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp b/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
new file mode 100644
index 000000000..4e245195d
--- /dev/null
+++ b/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
@@ -0,0 +1,310 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUOPTFLOW_HPP__
+#define __OPENCV_GPUOPTFLOW_HPP__
+
+#include "opencv2/core/gpumat.hpp"
+
+namespace cv { namespace gpu {
+
+////////////////////////////////// Optical Flow //////////////////////////////////////////
+
+class CV_EXPORTS BroxOpticalFlow
+{
+public:
+    BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
+        alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
+        inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
+    {
+    }
+
+    //! Compute optical flow
+    //! frame0 - source frame (supports only CV_32FC1 type)
+    //! frame1 - frame to track (with the same size and type as frame0)
+    //! u      - flow horizontal component (along x axis)
+    //! v      - flow vertical component (along y axis)
+    void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
+
+    //! flow smoothness
+    float alpha;
+
+    //! gradient constancy importance
+    float gamma;
+
+    //! pyramid scale factor
+    float scale_factor;
+
+    //! number of lagged non-linearity iterations (inner loop)
+    int inner_iterations;
+
+    //! number of warping iterations (number of pyramid levels)
+    int outer_iterations;
+
+    //! number of linear system solver iterations
+    int solver_iterations;
+
+    GpuMat buf;
+};
+
+class CV_EXPORTS PyrLKOpticalFlow
+{
+public:
+    PyrLKOpticalFlow();
+
+    void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
+        GpuMat& status, GpuMat* err = 0);
+
+    void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
+
+    void releaseMemory();
+
+    Size winSize;
+    int maxLevel;
+    int iters;
+    bool useInitialFlow;
+
+private:
+    std::vector<GpuMat> prevPyr_;
+    std::vector<GpuMat> nextPyr_;
+
+    GpuMat buf_;
+
+    GpuMat uPyr_[2];
+    GpuMat vPyr_[2];
+};
+
+class CV_EXPORTS FarnebackOpticalFlow
+{
+public:
+    FarnebackOpticalFlow()
+    {
+        numLevels = 5;
+        pyrScale = 0.5;
+        fastPyramids = false;
+        winSize = 13;
+        numIters = 10;
+        polyN = 5;
+        polySigma = 1.1;
+        flags = 0;
+    }
+
+    int numLevels;
+    double pyrScale;
+    bool fastPyramids;
+    int winSize;
+    int numIters;
+    int polyN;
+    double polySigma;
+    int flags;
+
+    void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
+
+    void releaseMemory()
+    {
+        frames_[0].release();
+        frames_[1].release();
+        pyrLevel_[0].release();
+        pyrLevel_[1].release();
+        M_.release();
+        bufM_.release();
+        R_[0].release();
+        R_[1].release();
+        blurredFrame_[0].release();
+        blurredFrame_[1].release();
+        pyramid0_.clear();
+        pyramid1_.clear();
+    }
+
+private:
+    void prepareGaussian(
+            int n, double sigma, float *g, float *xg, float *xxg,
+            double &ig11, double &ig03, double &ig33, double &ig55);
+
+    void setPolynomialExpansionConsts(int n, double sigma);
+
+    void updateFlow_boxFilter(
+            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
+            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
+
+    void updateFlow_gaussianBlur(
+            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
+            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
+
+    GpuMat frames_[2];
+    GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
+    std::vector<GpuMat> pyramid0_, pyramid1_;
+};
+
+// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+//
+// see reference:
+//   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+//   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+class CV_EXPORTS OpticalFlowDual_TVL1_GPU
+{
+public:
+    OpticalFlowDual_TVL1_GPU();
+
+    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
+
+    void collectGarbage();
+
+    /**
+     * Time step of the numerical scheme.
+     */
+    double tau;
+
+    /**
+     * Weight parameter for the data term, attachment parameter.
+     * This is the most relevant parameter, which determines the smoothness of the output.
+     * The smaller this parameter is, the smoother the solutions we obtain.
+     * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+     */
+    double lambda;
+
+    /**
+     * Weight parameter for (u - v)^2, tightness parameter.
+     * It serves as a link between the attachment and the regularization terms.
+     * In theory, it should have a small value in order to maintain both parts in correspondence.
+     * The method is stable for a large range of values of this parameter.
+     */
+    double theta;
+
+    /**
+     * Number of scales used to create the pyramid of images.
+     */
+    int nscales;
+
+    /**
+     * Number of warpings per scale.
+     * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+     * This is a parameter that assures the stability of the method.
+     * It also affects the running time, so it is a compromise between speed and accuracy.
+     */
+    int warps;
+
+    /**
+     * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+     * A small value will yield more accurate solutions at the expense of a slower convergence.
+     */
+    double epsilon;
+
+    /**
+     * Stopping criterion iterations number used in the numerical scheme.
+     */
+    int iterations;
+
+    double scaleStep;
+
+    bool useInitialFlow;
+
+private:
+    void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2);
+
+    std::vector<GpuMat> I0s;
+    std::vector<GpuMat> I1s;
+    std::vector<GpuMat> u1s;
+    std::vector<GpuMat> u2s;
+
+    GpuMat I1x_buf;
+    GpuMat I1y_buf;
+
+    GpuMat I1w_buf;
+    GpuMat I1wx_buf;
+    GpuMat I1wy_buf;
+
+    GpuMat grad_buf;
+    GpuMat rho_c_buf;
+
+    GpuMat p11_buf;
+    GpuMat p12_buf;
+    GpuMat p21_buf;
+    GpuMat p22_buf;
+
+    GpuMat diff_buf;
+    GpuMat norm_buf;
+};
+
+//! Calculates optical flow for 2 images using block matching algorithm */
+CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
+                                  Size block_size, Size shift_size, Size max_range, bool use_previous,
+                                  GpuMat& velx, GpuMat& vely, GpuMat& buf,
+                                  Stream& stream = Stream::Null());
+
+class CV_EXPORTS FastOpticalFlowBM
+{
+public:
+    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
+
+private:
+    GpuMat buffer;
+    GpuMat extended_I0;
+    GpuMat extended_I1;
+};
+
+
+//! Interpolate frames (images) using provided optical flow (displacement field).
+//! frame0   - frame 0 (32-bit floating point images, single channel)
+//! frame1   - frame 1 (the same type and size)
+//! fu       - forward horizontal displacement
+//! fv       - forward vertical displacement
+//! bu       - backward horizontal displacement
+//! bv       - backward vertical displacement
+//! pos      - new frame position
+//! newFrame - new frame
+//! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat;
+//!            occlusion masks            0, occlusion masks            1,
+//!            interpolated forward flow  0, interpolated forward flow  1,
+//!            interpolated backward flow 0, interpolated backward flow 1
+//!
+CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
+                                  const GpuMat& fu, const GpuMat& fv,
+                                  const GpuMat& bu, const GpuMat& bv,
+                                  float pos, GpuMat& newFrame, GpuMat& buf,
+                                  Stream& stream = Stream::Null());
+
+CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUOPTFLOW_HPP__ */
diff --git a/modules/gpuoptflow/perf/perf_main.cpp b/modules/gpuoptflow/perf/perf_main.cpp
new file mode 100644
index 000000000..dad5e52bb
--- /dev/null
+++ b/modules/gpuoptflow/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpuoptflow, printCudaInfo())
diff --git a/modules/gpuvideo/perf/perf_video.cpp b/modules/gpuoptflow/perf/perf_optflow.cpp
similarity index 53%
rename from modules/gpuvideo/perf/perf_video.cpp
rename to modules/gpuoptflow/perf/perf_optflow.cpp
index 59efd2e4a..febaee5f8 100644
--- a/modules/gpuvideo/perf/perf_video.cpp
+++ b/modules/gpuoptflow/perf/perf_optflow.cpp
@@ -47,26 +47,6 @@ using namespace std;
 using namespace testing;
 using namespace perf;
 
-#if defined(HAVE_XINE)         || \
-    defined(HAVE_GSTREAMER)    || \
-    defined(HAVE_QUICKTIME)    || \
-    defined(HAVE_AVFOUNDATION) || \
-    defined(HAVE_FFMPEG)       || \
-    defined(WIN32) /* assume that we have ffmpeg */
-
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
-#else
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
-#endif
-
-namespace cv
-{
-    template<> void Ptr<CvBGStatModel>::delete_obj()
-    {
-        cvReleaseBGStatModel(&obj);
-    }
-}
-
 //////////////////////////////////////////////////////
 // InterpolateFrames
 
@@ -152,45 +132,6 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
     }
 }
 
-//////////////////////////////////////////////////////
-// GoodFeaturesToTrack
-
-DEF_PARAM_TEST(Image_MinDistance, string, double);
-
-PERF_TEST_P(Image_MinDistance, Video_GoodFeaturesToTrack,
-            Combine(Values<string>("gpu/perf/aloe.png"),
-                    Values(0.0, 3.0)))
-{
-    const string fileName = GET_PARAM(0);
-    const double minDistance = GET_PARAM(1);
-
-    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
-
-    const int maxCorners = 8000;
-    const double qualityLevel = 0.01;
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance);
-
-        const cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat pts;
-
-        TEST_CYCLE() d_detector(d_image, pts);
-
-        GPU_SANITY_CHECK(pts);
-    }
-    else
-    {
-        cv::Mat pts;
-
-        TEST_CYCLE() cv::goodFeaturesToTrack(image, pts, maxCorners, qualityLevel, minDistance);
-
-        CPU_SANITY_CHECK(pts);
-    }
-}
-
 //////////////////////////////////////////////////////
 // BroxOpticalFlow
 
@@ -536,472 +477,3 @@ PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM,
         FAIL_NO_CPU();
     }
 }
-
-//////////////////////////////////////////////////////
-// FGDStatModel
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-DEF_PARAM_TEST_1(Video, string);
-
-PERF_TEST_P(Video, Video_FGDStatModel,
-            Values(string("gpu/video/768x576.avi")))
-{
-    declare.time(60);
-
-    const string inputFile = perf::TestBase::getDataPath(GetParam());
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_frame(frame);
-
-        cv::gpu::FGDStatModel d_model(4);
-        d_model.create(d_frame);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            d_frame.upload(frame);
-
-            startTimer(); next();
-            d_model.update(d_frame);
-            stopTimer();
-        }
-
-        const cv::gpu::GpuMat background = d_model.background;
-        const cv::gpu::GpuMat foreground = d_model.foreground;
-
-        GPU_SANITY_CHECK(background, 1e-2, ERROR_RELATIVE);
-        GPU_SANITY_CHECK(foreground, 1e-2, ERROR_RELATIVE);
-    }
-    else
-    {
-        IplImage ipl_frame = frame;
-        cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            ipl_frame = frame;
-
-            startTimer(); next();
-            cvUpdateBGStatModel(&ipl_frame, model);
-            stopTimer();
-        }
-
-        const cv::Mat background = cv::cvarrToMat(model->background);
-        const cv::Mat foreground = cv::cvarrToMat(model->foreground);
-
-        CPU_SANITY_CHECK(background);
-        CPU_SANITY_CHECK(foreground);
-    }
-}
-
-#endif
-
-//////////////////////////////////////////////////////
-// MOG
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-DEF_PARAM_TEST(Video_Cn_LearningRate, string, MatCn, double);
-
-PERF_TEST_P(Video_Cn_LearningRate, Video_MOG,
-            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-                    GPU_CHANNELS_1_3_4,
-                    Values(0.0, 0.01)))
-{
-    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    const int cn = GET_PARAM(1);
-    const float learningRate = static_cast<float>(GET_PARAM(2));
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    if (cn != 3)
-    {
-        cv::Mat temp;
-        if (cn == 1)
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-        else
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-        cv::swap(temp, frame);
-    }
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::MOG_GPU d_mog;
-        cv::gpu::GpuMat foreground;
-
-        d_mog(d_frame, foreground, learningRate);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            d_frame.upload(frame);
-
-            startTimer(); next();
-            d_mog(d_frame, foreground, learningRate);
-            stopTimer();
-        }
-
-        GPU_SANITY_CHECK(foreground);
-    }
-    else
-    {
-        cv::Ptr<cv::BackgroundSubtractor> mog = cv::createBackgroundSubtractorMOG();
-        cv::Mat foreground;
-
-        mog->apply(frame, foreground, learningRate);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            startTimer(); next();
-            mog->apply(frame, foreground, learningRate);
-            stopTimer();
-        }
-
-        CPU_SANITY_CHECK(foreground);
-    }
-}
-
-#endif
-
-//////////////////////////////////////////////////////
-// MOG2
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-DEF_PARAM_TEST(Video_Cn, string, int);
-
-PERF_TEST_P(Video_Cn, Video_MOG2,
-            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-                    GPU_CHANNELS_1_3_4))
-{
-    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    const int cn = GET_PARAM(1);
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    if (cn != 3)
-    {
-        cv::Mat temp;
-        if (cn == 1)
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-        else
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-        cv::swap(temp, frame);
-    }
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::MOG2_GPU d_mog2;
-        d_mog2.bShadowDetection = false;
-
-        cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::GpuMat foreground;
-
-        d_mog2(d_frame, foreground);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            d_frame.upload(frame);
-
-            startTimer(); next();
-            d_mog2(d_frame, foreground);
-            stopTimer();
-        }
-
-        GPU_SANITY_CHECK(foreground);
-    }
-    else
-    {
-        cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
-        mog2->set("detectShadows", false);
-
-        cv::Mat foreground;
-
-        mog2->apply(frame, foreground);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            startTimer(); next();
-            mog2->apply(frame, foreground);
-            stopTimer();
-        }
-
-        CPU_SANITY_CHECK(foreground);
-    }
-}
-
-#endif
-
-//////////////////////////////////////////////////////
-// MOG2GetBackgroundImage
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
-            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-                    GPU_CHANNELS_1_3_4))
-{
-    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    const int cn = GET_PARAM(1);
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_frame;
-        cv::gpu::MOG2_GPU d_mog2;
-        cv::gpu::GpuMat d_foreground;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            d_frame.upload(frame);
-
-            d_mog2(d_frame, d_foreground);
-        }
-
-        cv::gpu::GpuMat background;
-
-        TEST_CYCLE() d_mog2.getBackgroundImage(background);
-
-        GPU_SANITY_CHECK(background, 1);
-    }
-    else
-    {
-        cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
-        cv::Mat foreground;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            mog2->apply(frame, foreground);
-        }
-
-        cv::Mat background;
-
-        TEST_CYCLE() mog2->getBackgroundImage(background);
-
-        CPU_SANITY_CHECK(background);
-    }
-}
-
-#endif
-
-//////////////////////////////////////////////////////
-// GMG
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-DEF_PARAM_TEST(Video_Cn_MaxFeatures, string, MatCn, int);
-
-PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
-            Combine(Values(string("gpu/video/768x576.avi")),
-                    GPU_CHANNELS_1_3_4,
-                    Values(20, 40, 60)))
-{
-    const std::string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    const int cn = GET_PARAM(1);
-    const int maxFeatures = GET_PARAM(2);
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    if (cn != 3)
-    {
-        cv::Mat temp;
-        if (cn == 1)
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-        else
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-        cv::swap(temp, frame);
-    }
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::GpuMat foreground;
-
-        cv::gpu::GMG_GPU d_gmg;
-        d_gmg.maxFeatures = maxFeatures;
-
-        d_gmg(d_frame, foreground);
-
-        for (int i = 0; i < 150; ++i)
-        {
-            cap >> frame;
-            if (frame.empty())
-            {
-                cap.release();
-                cap.open(inputFile);
-                cap >> frame;
-            }
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            d_frame.upload(frame);
-
-            startTimer(); next();
-            d_gmg(d_frame, foreground);
-            stopTimer();
-        }
-
-        GPU_SANITY_CHECK(foreground);
-    }
-    else
-    {
-        cv::Mat foreground;
-        cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
-
-        cv::Ptr<cv::BackgroundSubtractor> gmg = cv::createBackgroundSubtractorGMG();
-        gmg->set("maxFeatures", maxFeatures);
-        //gmg.initialize(frame.size(), 0.0, 255.0);
-
-        gmg->apply(frame, foreground);
-
-        for (int i = 0; i < 150; ++i)
-        {
-            cap >> frame;
-            if (frame.empty())
-            {
-                cap.release();
-                cap.open(inputFile);
-                cap >> frame;
-            }
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            startTimer(); next();
-            gmg->apply(frame, foreground);
-            stopTimer();
-        }
-
-        CPU_SANITY_CHECK(foreground);
-    }
-}
-
-#endif
diff --git a/modules/gpuoptflow/perf/perf_precomp.cpp b/modules/gpuoptflow/perf/perf_precomp.cpp
new file mode 100644
index 000000000..81f16e8f1
--- /dev/null
+++ b/modules/gpuoptflow/perf/perf_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
diff --git a/modules/gpuoptflow/perf/perf_precomp.hpp b/modules/gpuoptflow/perf/perf_precomp.hpp
new file mode 100644
index 000000000..cdc671b79
--- /dev/null
+++ b/modules/gpuoptflow/perf/perf_precomp.hpp
@@ -0,0 +1,66 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpuoptflow.hpp"
+
+#include "opencv2/video.hpp"
+#include "opencv2/legacy.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpuvideo/src/cuda/optflowbm.cu b/modules/gpuoptflow/src/cuda/optflowbm.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/optflowbm.cu
rename to modules/gpuoptflow/src/cuda/optflowbm.cu
diff --git a/modules/gpuvideo/src/cuda/optical_flow.cu b/modules/gpuoptflow/src/cuda/optical_flow.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/optical_flow.cu
rename to modules/gpuoptflow/src/cuda/optical_flow.cu
diff --git a/modules/gpuvideo/src/cuda/optical_flow_farneback.cu b/modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/optical_flow_farneback.cu
rename to modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
diff --git a/modules/gpuvideo/src/cuda/pyrlk.cu b/modules/gpuoptflow/src/cuda/pyrlk.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/pyrlk.cu
rename to modules/gpuoptflow/src/cuda/pyrlk.cu
diff --git a/modules/gpuvideo/src/cuda/tvl1flow.cu b/modules/gpuoptflow/src/cuda/tvl1flow.cu
similarity index 100%
rename from modules/gpuvideo/src/cuda/tvl1flow.cu
rename to modules/gpuoptflow/src/cuda/tvl1flow.cu
diff --git a/modules/gpuvideo/src/optflowbm.cpp b/modules/gpuoptflow/src/optflowbm.cpp
similarity index 100%
rename from modules/gpuvideo/src/optflowbm.cpp
rename to modules/gpuoptflow/src/optflowbm.cpp
diff --git a/modules/gpuvideo/src/optical_flow.cpp b/modules/gpuoptflow/src/optical_flow.cpp
similarity index 100%
rename from modules/gpuvideo/src/optical_flow.cpp
rename to modules/gpuoptflow/src/optical_flow.cpp
diff --git a/modules/gpuvideo/src/optical_flow_farneback.cpp b/modules/gpuoptflow/src/optical_flow_farneback.cpp
similarity index 100%
rename from modules/gpuvideo/src/optical_flow_farneback.cpp
rename to modules/gpuoptflow/src/optical_flow_farneback.cpp
diff --git a/modules/gpuoptflow/src/precomp.cpp b/modules/gpuoptflow/src/precomp.cpp
new file mode 100644
index 000000000..3c01a2596
--- /dev/null
+++ b/modules/gpuoptflow/src/precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
diff --git a/modules/gpuoptflow/src/precomp.hpp b/modules/gpuoptflow/src/precomp.hpp
new file mode 100644
index 000000000..2bf47b228
--- /dev/null
+++ b/modules/gpuoptflow/src/precomp.hpp
@@ -0,0 +1,62 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+
+#include "opencv2/gpuoptflow.hpp"
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpuwarping.hpp"
+
+#include "opencv2/video.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPULEGACY
+#  include "opencv2/gpulegacy/private.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuvideo/src/pyrlk.cpp b/modules/gpuoptflow/src/pyrlk.cpp
similarity index 100%
rename from modules/gpuvideo/src/pyrlk.cpp
rename to modules/gpuoptflow/src/pyrlk.cpp
diff --git a/modules/gpuvideo/src/tvl1flow.cpp b/modules/gpuoptflow/src/tvl1flow.cpp
similarity index 100%
rename from modules/gpuvideo/src/tvl1flow.cpp
rename to modules/gpuoptflow/src/tvl1flow.cpp
diff --git a/modules/gpuoptflow/test/test_main.cpp b/modules/gpuoptflow/test/test_main.cpp
new file mode 100644
index 000000000..eea3d7c00
--- /dev/null
+++ b/modules/gpuoptflow/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_GPU_TEST_MAIN("gpu")
diff --git a/modules/gpuvideo/test/test_optflow.cpp b/modules/gpuoptflow/test/test_optflow.cpp
similarity index 89%
rename from modules/gpuvideo/test/test_optflow.cpp
rename to modules/gpuoptflow/test/test_optflow.cpp
index 34a08f2cf..893ab89dd 100644
--- a/modules/gpuvideo/test/test_optflow.cpp
+++ b/modules/gpuoptflow/test/test_optflow.cpp
@@ -151,88 +151,6 @@ GPU_TEST_P(BroxOpticalFlow, OpticalFlowNan)
 
 INSTANTIATE_TEST_CASE_P(GPU_Video, BroxOpticalFlow, ALL_DEVICES);
 
-//////////////////////////////////////////////////////
-// GoodFeaturesToTrack
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(MinDistance, double)
-}
-
-PARAM_TEST_CASE(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
-{
-    cv::gpu::DeviceInfo devInfo;
-    double minDistance;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        minDistance = GET_PARAM(1);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(GoodFeaturesToTrack, Accuracy)
-{
-    cv::Mat image = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
-
-    int maxCorners = 1000;
-    double qualityLevel = 0.01;
-
-    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(maxCorners, qualityLevel, minDistance);
-
-    cv::gpu::GpuMat d_pts;
-    detector(loadMat(image), d_pts);
-
-    ASSERT_FALSE(d_pts.empty());
-
-    std::vector<cv::Point2f> pts(d_pts.cols);
-    cv::Mat pts_mat(1, d_pts.cols, CV_32FC2, (void*) &pts[0]);
-    d_pts.download(pts_mat);
-
-    std::vector<cv::Point2f> pts_gold;
-    cv::goodFeaturesToTrack(image, pts_gold, maxCorners, qualityLevel, minDistance);
-
-    ASSERT_EQ(pts_gold.size(), pts.size());
-
-    size_t mistmatch = 0;
-    for (size_t i = 0; i < pts.size(); ++i)
-    {
-        cv::Point2i a = pts_gold[i];
-        cv::Point2i b = pts[i];
-
-        bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
-
-        if (!eq)
-            ++mistmatch;
-    }
-
-    double bad_ratio = static_cast<double>(mistmatch) / pts.size();
-
-    ASSERT_LE(bad_ratio, 0.01);
-}
-
-GPU_TEST_P(GoodFeaturesToTrack, EmptyCorners)
-{
-    int maxCorners = 1000;
-    double qualityLevel = 0.01;
-
-    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(maxCorners, qualityLevel, minDistance);
-
-    cv::gpu::GpuMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
-    cv::gpu::GpuMat corners(1, maxCorners, CV_32FC2);
-
-    detector(src, corners);
-
-    ASSERT_TRUE(corners.empty());
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Video, GoodFeaturesToTrack, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(MinDistance(0.0), MinDistance(3.0))));
-
 //////////////////////////////////////////////////////
 // PyrLKOpticalFlow
 
diff --git a/modules/gpuoptflow/test/test_precomp.cpp b/modules/gpuoptflow/test/test_precomp.cpp
new file mode 100644
index 000000000..0fb652180
--- /dev/null
+++ b/modules/gpuoptflow/test/test_precomp.cpp
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
diff --git a/modules/gpuvideo/test/test_precomp.hpp b/modules/gpuoptflow/test/test_precomp.hpp
similarity index 98%
rename from modules/gpuvideo/test/test_precomp.hpp
rename to modules/gpuoptflow/test/test_precomp.hpp
index b2b141aa4..4f993dd8b 100644
--- a/modules/gpuvideo/test/test_precomp.hpp
+++ b/modules/gpuoptflow/test/test_precomp.hpp
@@ -56,9 +56,8 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_test.hpp"
 
-#include "opencv2/gpuvideo.hpp"
+#include "opencv2/gpuoptflow.hpp"
 #include "opencv2/gpuimgproc.hpp"
-
 #include "opencv2/video.hpp"
 #include "opencv2/legacy.hpp"
 
diff --git a/modules/gpuvideo/CMakeLists.txt b/modules/gpuvideo/CMakeLists.txt
deleted file mode 100644
index 6c15bd147..000000000
--- a/modules/gpuvideo/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-if(ANDROID OR IOS)
-  ocv_module_disable(gpuvideo)
-endif()
-
-set(the_description "GPU-accelerated Video Analysis")
-
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
-
-ocv_define_module(gpuvideo opencv_video opencv_legacy opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuvideo/doc/gpuvideo.rst b/modules/gpuvideo/doc/gpuvideo.rst
deleted file mode 100644
index e16d7c427..000000000
--- a/modules/gpuvideo/doc/gpuvideo.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-***********************************
-gpu. GPU-accelerated Video Analysis
-***********************************
-
-.. toctree::
-    :maxdepth: 1
-
-    video
diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index 84913d284..97884825b 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -22,7 +22,8 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuwarping/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuvideo/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuoptflow/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpubgsegm/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpustereo/include")
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
   endif()
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 7fed87be0..1c6f6a963 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -3,7 +3,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
                                      opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
-                                     opencv_gpufeatures2d opencv_gpuvideo
+                                     opencv_gpufeatures2d opencv_gpuoptflow opencv_gpubgsegm
                                      opencv_gpustereo opencv_gpulegacy)
 
 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})

From d5ee931287762abaf74d1d153a87fce5d9ab7271 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 17 Apr 2013 18:22:29 +0400
Subject: [PATCH 32/49] fixed compilation issues

---
 modules/gpubgsegm/src/precomp.hpp | 2 +-
 modules/photo/CMakeLists.txt      | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/modules/gpubgsegm/src/precomp.hpp b/modules/gpubgsegm/src/precomp.hpp
index 9f9dcb94c..4a0b01267 100644
--- a/modules/gpubgsegm/src/precomp.hpp
+++ b/modules/gpubgsegm/src/precomp.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
 #include "opencv2/gpuimgproc.hpp"
-
+#include "opencv2/imgproc/imgproc_c.h"
 #include "opencv2/video.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
diff --git a/modules/photo/CMakeLists.txt b/modules/photo/CMakeLists.txt
index d36651201..b3e0e18dc 100644
--- a/modules/photo/CMakeLists.txt
+++ b/modules/photo/CMakeLists.txt
@@ -1,2 +1,7 @@
 set(the_description "Computational Photography")
+
+if(HAVE_CUDA)
+  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+endif()
+
 ocv_define_module(photo opencv_imgproc OPTIONAL opencv_gpuimgproc)

From 43d5e2d8b4ea4ff9bf0c83f874a2ac8a79f6ff5e Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 11 Apr 2013 14:53:35 +0400
Subject: [PATCH 33/49] removed gpu BORDER_* constants

---
 .../core/include/opencv2/core/cuda/common.hpp |   9 --
 .../core/include/opencv2/core/gpu_private.hpp |   4 -
 modules/core/src/gpumat.cpp                   |  30 ----
 modules/gpuarithm/src/arithm.cpp              |   9 +-
 .../gpuarithm/src/cuda/copy_make_border.cu    |   6 +-
 modules/gpufilters/src/cuda/column_filter.hpp | 134 +++++++++---------
 modules/gpufilters/src/cuda/filter2d.cu       |   6 +-
 modules/gpufilters/src/cuda/row_filter.hpp    | 134 +++++++++---------
 modules/gpufilters/src/filtering.cpp          |  15 +-
 modules/gpuimgproc/src/bilateral_filter.cpp   |   5 +-
 .../gpuimgproc/src/cuda/bilateral_filter.cu   |   4 +-
 modules/gpuimgproc/src/cuda/imgproc.cu        |  12 +-
 modules/gpuimgproc/src/imgproc.cpp            |  10 +-
 .../src/cuda/optical_flow_farneback.cu        |  15 +-
 .../gpuoptflow/src/optical_flow_farneback.cpp |   6 +-
 modules/gpuwarping/src/cuda/remap.cu          |  18 +--
 modules/gpuwarping/src/cuda/warp.cu           |  18 +--
 modules/gpuwarping/src/remap.cpp              |   5 +-
 modules/gpuwarping/src/warp.cpp               |  10 +-
 modules/photo/src/cuda/nlm.cu                 |   4 +-
 modules/photo/src/denoising_gpu.cpp           |   5 +-
 21 files changed, 196 insertions(+), 263 deletions(-)

diff --git a/modules/core/include/opencv2/core/cuda/common.hpp b/modules/core/include/opencv2/core/cuda/common.hpp
index 774500e64..434a3eba1 100644
--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -87,15 +87,6 @@ namespace cv { namespace gpu
 
 namespace cv { namespace gpu
 {
-    enum
-    {
-        BORDER_REFLECT101_GPU = 0,
-        BORDER_REPLICATE_GPU,
-        BORDER_CONSTANT_GPU,
-        BORDER_REFLECT_GPU,
-        BORDER_WRAP_GPU
-    };
-
     namespace cudev
     {
         __host__ __device__ __forceinline__ int divUp(int total, int grain)
diff --git a/modules/core/include/opencv2/core/gpu_private.hpp b/modules/core/include/opencv2/core/gpu_private.hpp
index be194f54e..7692bc20e 100644
--- a/modules/core/include/opencv2/core/gpu_private.hpp
+++ b/modules/core/include/opencv2/core/gpu_private.hpp
@@ -74,10 +74,6 @@
 namespace cv { namespace gpu {
     CV_EXPORTS cv::String getNppErrorMessage(int code);
     CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
-
-    // Converts CPU border extrapolation mode into GPU internal analogue.
-    // Returns true if the GPU analogue exists, false otherwise.
-    CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
 }}
 
 #ifndef HAVE_CUDA
diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp
index c5d4d7a4a..11bb41948 100644
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@@ -1678,33 +1678,3 @@ String cv::gpu::getCudaDriverApiErrorMessage(int code)
     return getErrorString(code, cu_errors, cu_errors_num);
 #endif
 }
-
-bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
-{
-#ifndef HAVE_CUDA
-    (void) cpuBorderType;
-    (void) gpuBorderType;
-    return false;
-#else
-    switch (cpuBorderType)
-    {
-    case IPL_BORDER_REFLECT_101:
-        gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;
-        return true;
-    case IPL_BORDER_REPLICATE:
-        gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;
-        return true;
-    case IPL_BORDER_CONSTANT:
-        gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;
-        return true;
-    case IPL_BORDER_REFLECT:
-        gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;
-        return true;
-    case IPL_BORDER_WRAP:
-        gpuBorderType = cv::gpu::BORDER_WRAP_GPU;
-        return true;
-    default:
-        return false;
-    };
-#endif
-}
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index cc85cc72a..908d96341 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -696,13 +696,13 @@ typedef Npp32s Npp32s_a;
 void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)
 {
     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(borderType == IPL_BORDER_REFLECT_101 || borderType == IPL_BORDER_REPLICATE || borderType == IPL_BORDER_CONSTANT || borderType == IPL_BORDER_REFLECT || borderType == IPL_BORDER_WRAP);
+    CV_Assert(borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
 
     dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
 
     cudaStream_t stream = StreamAccessor::getStream(s);
 
-    if (borderType == IPL_BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
+    if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
     {
         NppiSize srcsz;
         srcsz.width  = src.cols;
@@ -766,10 +766,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
         caller_t func = callers[src.depth()][src.channels() - 1];
         CV_Assert(func != 0);
 
-        int gpuBorderType;
-        CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
-
-        func(src, dst, top, left, gpuBorderType, value, stream);
+        func(src, dst, top, left, borderType, value, stream);
     }
 }
 
diff --git a/modules/gpuarithm/src/cuda/copy_make_border.cu b/modules/gpuarithm/src/cuda/copy_make_border.cu
index ed90e9e80..d772e09ed 100644
--- a/modules/gpuarithm/src/cuda/copy_make_border.cu
+++ b/modules/gpuarithm/src/cuda/copy_make_border.cu
@@ -86,11 +86,11 @@ namespace cv { namespace gpu { namespace cudev
 
             static const caller_t callers[5] =
             {
-                CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
-                CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
                 CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
+                CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
                 CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
-                CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
+                CopyMakeBorderDispatcher<BrdWrap, vec_type>::call,
+                CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call
             };
 
             callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
diff --git a/modules/gpufilters/src/cuda/column_filter.hpp b/modules/gpufilters/src/cuda/column_filter.hpp
index 39b6d4762..6f10c36f5 100644
--- a/modules/gpufilters/src/cuda/column_filter.hpp
+++ b/modules/gpufilters/src/cuda/column_filter.hpp
@@ -187,38 +187,38 @@ namespace filter
         {
             {
                 0,
-                column_filter::caller< 1, T, D, BrdColReflect101>,
-                column_filter::caller< 2, T, D, BrdColReflect101>,
-                column_filter::caller< 3, T, D, BrdColReflect101>,
-                column_filter::caller< 4, T, D, BrdColReflect101>,
-                column_filter::caller< 5, T, D, BrdColReflect101>,
-                column_filter::caller< 6, T, D, BrdColReflect101>,
-                column_filter::caller< 7, T, D, BrdColReflect101>,
-                column_filter::caller< 8, T, D, BrdColReflect101>,
-                column_filter::caller< 9, T, D, BrdColReflect101>,
-                column_filter::caller<10, T, D, BrdColReflect101>,
-                column_filter::caller<11, T, D, BrdColReflect101>,
-                column_filter::caller<12, T, D, BrdColReflect101>,
-                column_filter::caller<13, T, D, BrdColReflect101>,
-                column_filter::caller<14, T, D, BrdColReflect101>,
-                column_filter::caller<15, T, D, BrdColReflect101>,
-                column_filter::caller<16, T, D, BrdColReflect101>,
-                column_filter::caller<17, T, D, BrdColReflect101>,
-                column_filter::caller<18, T, D, BrdColReflect101>,
-                column_filter::caller<19, T, D, BrdColReflect101>,
-                column_filter::caller<20, T, D, BrdColReflect101>,
-                column_filter::caller<21, T, D, BrdColReflect101>,
-                column_filter::caller<22, T, D, BrdColReflect101>,
-                column_filter::caller<23, T, D, BrdColReflect101>,
-                column_filter::caller<24, T, D, BrdColReflect101>,
-                column_filter::caller<25, T, D, BrdColReflect101>,
-                column_filter::caller<26, T, D, BrdColReflect101>,
-                column_filter::caller<27, T, D, BrdColReflect101>,
-                column_filter::caller<28, T, D, BrdColReflect101>,
-                column_filter::caller<29, T, D, BrdColReflect101>,
-                column_filter::caller<30, T, D, BrdColReflect101>,
-                column_filter::caller<31, T, D, BrdColReflect101>,
-                column_filter::caller<32, T, D, BrdColReflect101>
+                column_filter::caller< 1, T, D, BrdColConstant>,
+                column_filter::caller< 2, T, D, BrdColConstant>,
+                column_filter::caller< 3, T, D, BrdColConstant>,
+                column_filter::caller< 4, T, D, BrdColConstant>,
+                column_filter::caller< 5, T, D, BrdColConstant>,
+                column_filter::caller< 6, T, D, BrdColConstant>,
+                column_filter::caller< 7, T, D, BrdColConstant>,
+                column_filter::caller< 8, T, D, BrdColConstant>,
+                column_filter::caller< 9, T, D, BrdColConstant>,
+                column_filter::caller<10, T, D, BrdColConstant>,
+                column_filter::caller<11, T, D, BrdColConstant>,
+                column_filter::caller<12, T, D, BrdColConstant>,
+                column_filter::caller<13, T, D, BrdColConstant>,
+                column_filter::caller<14, T, D, BrdColConstant>,
+                column_filter::caller<15, T, D, BrdColConstant>,
+                column_filter::caller<16, T, D, BrdColConstant>,
+                column_filter::caller<17, T, D, BrdColConstant>,
+                column_filter::caller<18, T, D, BrdColConstant>,
+                column_filter::caller<19, T, D, BrdColConstant>,
+                column_filter::caller<20, T, D, BrdColConstant>,
+                column_filter::caller<21, T, D, BrdColConstant>,
+                column_filter::caller<22, T, D, BrdColConstant>,
+                column_filter::caller<23, T, D, BrdColConstant>,
+                column_filter::caller<24, T, D, BrdColConstant>,
+                column_filter::caller<25, T, D, BrdColConstant>,
+                column_filter::caller<26, T, D, BrdColConstant>,
+                column_filter::caller<27, T, D, BrdColConstant>,
+                column_filter::caller<28, T, D, BrdColConstant>,
+                column_filter::caller<29, T, D, BrdColConstant>,
+                column_filter::caller<30, T, D, BrdColConstant>,
+                column_filter::caller<31, T, D, BrdColConstant>,
+                column_filter::caller<32, T, D, BrdColConstant>
             },
             {
                 0,
@@ -255,41 +255,6 @@ namespace filter
                 column_filter::caller<31, T, D, BrdColReplicate>,
                 column_filter::caller<32, T, D, BrdColReplicate>
             },
-            {
-                0,
-                column_filter::caller< 1, T, D, BrdColConstant>,
-                column_filter::caller< 2, T, D, BrdColConstant>,
-                column_filter::caller< 3, T, D, BrdColConstant>,
-                column_filter::caller< 4, T, D, BrdColConstant>,
-                column_filter::caller< 5, T, D, BrdColConstant>,
-                column_filter::caller< 6, T, D, BrdColConstant>,
-                column_filter::caller< 7, T, D, BrdColConstant>,
-                column_filter::caller< 8, T, D, BrdColConstant>,
-                column_filter::caller< 9, T, D, BrdColConstant>,
-                column_filter::caller<10, T, D, BrdColConstant>,
-                column_filter::caller<11, T, D, BrdColConstant>,
-                column_filter::caller<12, T, D, BrdColConstant>,
-                column_filter::caller<13, T, D, BrdColConstant>,
-                column_filter::caller<14, T, D, BrdColConstant>,
-                column_filter::caller<15, T, D, BrdColConstant>,
-                column_filter::caller<16, T, D, BrdColConstant>,
-                column_filter::caller<17, T, D, BrdColConstant>,
-                column_filter::caller<18, T, D, BrdColConstant>,
-                column_filter::caller<19, T, D, BrdColConstant>,
-                column_filter::caller<20, T, D, BrdColConstant>,
-                column_filter::caller<21, T, D, BrdColConstant>,
-                column_filter::caller<22, T, D, BrdColConstant>,
-                column_filter::caller<23, T, D, BrdColConstant>,
-                column_filter::caller<24, T, D, BrdColConstant>,
-                column_filter::caller<25, T, D, BrdColConstant>,
-                column_filter::caller<26, T, D, BrdColConstant>,
-                column_filter::caller<27, T, D, BrdColConstant>,
-                column_filter::caller<28, T, D, BrdColConstant>,
-                column_filter::caller<29, T, D, BrdColConstant>,
-                column_filter::caller<30, T, D, BrdColConstant>,
-                column_filter::caller<31, T, D, BrdColConstant>,
-                column_filter::caller<32, T, D, BrdColConstant>
-            },
             {
                 0,
                 column_filter::caller< 1, T, D, BrdColReflect>,
@@ -359,6 +324,41 @@ namespace filter
                 column_filter::caller<30, T, D, BrdColWrap>,
                 column_filter::caller<31, T, D, BrdColWrap>,
                 column_filter::caller<32, T, D, BrdColWrap>
+            },
+            {
+                0,
+                column_filter::caller< 1, T, D, BrdColReflect101>,
+                column_filter::caller< 2, T, D, BrdColReflect101>,
+                column_filter::caller< 3, T, D, BrdColReflect101>,
+                column_filter::caller< 4, T, D, BrdColReflect101>,
+                column_filter::caller< 5, T, D, BrdColReflect101>,
+                column_filter::caller< 6, T, D, BrdColReflect101>,
+                column_filter::caller< 7, T, D, BrdColReflect101>,
+                column_filter::caller< 8, T, D, BrdColReflect101>,
+                column_filter::caller< 9, T, D, BrdColReflect101>,
+                column_filter::caller<10, T, D, BrdColReflect101>,
+                column_filter::caller<11, T, D, BrdColReflect101>,
+                column_filter::caller<12, T, D, BrdColReflect101>,
+                column_filter::caller<13, T, D, BrdColReflect101>,
+                column_filter::caller<14, T, D, BrdColReflect101>,
+                column_filter::caller<15, T, D, BrdColReflect101>,
+                column_filter::caller<16, T, D, BrdColReflect101>,
+                column_filter::caller<17, T, D, BrdColReflect101>,
+                column_filter::caller<18, T, D, BrdColReflect101>,
+                column_filter::caller<19, T, D, BrdColReflect101>,
+                column_filter::caller<20, T, D, BrdColReflect101>,
+                column_filter::caller<21, T, D, BrdColReflect101>,
+                column_filter::caller<22, T, D, BrdColReflect101>,
+                column_filter::caller<23, T, D, BrdColReflect101>,
+                column_filter::caller<24, T, D, BrdColReflect101>,
+                column_filter::caller<25, T, D, BrdColReflect101>,
+                column_filter::caller<26, T, D, BrdColReflect101>,
+                column_filter::caller<27, T, D, BrdColReflect101>,
+                column_filter::caller<28, T, D, BrdColReflect101>,
+                column_filter::caller<29, T, D, BrdColReflect101>,
+                column_filter::caller<30, T, D, BrdColReflect101>,
+                column_filter::caller<31, T, D, BrdColReflect101>,
+                column_filter::caller<32, T, D, BrdColReflect101>
             }
         };
 
diff --git a/modules/gpufilters/src/cuda/filter2d.cu b/modules/gpufilters/src/cuda/filter2d.cu
index 0bb5fcd87..80c93c54e 100644
--- a/modules/gpufilters/src/cuda/filter2d.cu
+++ b/modules/gpufilters/src/cuda/filter2d.cu
@@ -131,11 +131,11 @@ namespace cv { namespace gpu { namespace cudev
             typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
             static const func_t funcs[] =
             {
-                Filter2DCaller<T, D, BrdReflect101>::call,
-                Filter2DCaller<T, D, BrdReplicate>::call,
                 Filter2DCaller<T, D, BrdConstant>::call,
+                Filter2DCaller<T, D, BrdReplicate>::call,
                 Filter2DCaller<T, D, BrdReflect>::call,
-                Filter2DCaller<T, D, BrdWrap>::call
+                Filter2DCaller<T, D, BrdWrap>::call,
+                Filter2DCaller<T, D, BrdReflect101>::call
             };
 
             if (stream == 0)
diff --git a/modules/gpufilters/src/cuda/row_filter.hpp b/modules/gpufilters/src/cuda/row_filter.hpp
index 787da3449..3199a02e6 100644
--- a/modules/gpufilters/src/cuda/row_filter.hpp
+++ b/modules/gpufilters/src/cuda/row_filter.hpp
@@ -186,38 +186,38 @@ namespace filter
         {
             {
                 0,
-                row_filter::caller< 1, T, D, BrdRowReflect101>,
-                row_filter::caller< 2, T, D, BrdRowReflect101>,
-                row_filter::caller< 3, T, D, BrdRowReflect101>,
-                row_filter::caller< 4, T, D, BrdRowReflect101>,
-                row_filter::caller< 5, T, D, BrdRowReflect101>,
-                row_filter::caller< 6, T, D, BrdRowReflect101>,
-                row_filter::caller< 7, T, D, BrdRowReflect101>,
-                row_filter::caller< 8, T, D, BrdRowReflect101>,
-                row_filter::caller< 9, T, D, BrdRowReflect101>,
-                row_filter::caller<10, T, D, BrdRowReflect101>,
-                row_filter::caller<11, T, D, BrdRowReflect101>,
-                row_filter::caller<12, T, D, BrdRowReflect101>,
-                row_filter::caller<13, T, D, BrdRowReflect101>,
-                row_filter::caller<14, T, D, BrdRowReflect101>,
-                row_filter::caller<15, T, D, BrdRowReflect101>,
-                row_filter::caller<16, T, D, BrdRowReflect101>,
-                row_filter::caller<17, T, D, BrdRowReflect101>,
-                row_filter::caller<18, T, D, BrdRowReflect101>,
-                row_filter::caller<19, T, D, BrdRowReflect101>,
-                row_filter::caller<20, T, D, BrdRowReflect101>,
-                row_filter::caller<21, T, D, BrdRowReflect101>,
-                row_filter::caller<22, T, D, BrdRowReflect101>,
-                row_filter::caller<23, T, D, BrdRowReflect101>,
-                row_filter::caller<24, T, D, BrdRowReflect101>,
-                row_filter::caller<25, T, D, BrdRowReflect101>,
-                row_filter::caller<26, T, D, BrdRowReflect101>,
-                row_filter::caller<27, T, D, BrdRowReflect101>,
-                row_filter::caller<28, T, D, BrdRowReflect101>,
-                row_filter::caller<29, T, D, BrdRowReflect101>,
-                row_filter::caller<30, T, D, BrdRowReflect101>,
-                row_filter::caller<31, T, D, BrdRowReflect101>,
-                row_filter::caller<32, T, D, BrdRowReflect101>
+                row_filter::caller< 1, T, D, BrdRowConstant>,
+                row_filter::caller< 2, T, D, BrdRowConstant>,
+                row_filter::caller< 3, T, D, BrdRowConstant>,
+                row_filter::caller< 4, T, D, BrdRowConstant>,
+                row_filter::caller< 5, T, D, BrdRowConstant>,
+                row_filter::caller< 6, T, D, BrdRowConstant>,
+                row_filter::caller< 7, T, D, BrdRowConstant>,
+                row_filter::caller< 8, T, D, BrdRowConstant>,
+                row_filter::caller< 9, T, D, BrdRowConstant>,
+                row_filter::caller<10, T, D, BrdRowConstant>,
+                row_filter::caller<11, T, D, BrdRowConstant>,
+                row_filter::caller<12, T, D, BrdRowConstant>,
+                row_filter::caller<13, T, D, BrdRowConstant>,
+                row_filter::caller<14, T, D, BrdRowConstant>,
+                row_filter::caller<15, T, D, BrdRowConstant>,
+                row_filter::caller<16, T, D, BrdRowConstant>,
+                row_filter::caller<17, T, D, BrdRowConstant>,
+                row_filter::caller<18, T, D, BrdRowConstant>,
+                row_filter::caller<19, T, D, BrdRowConstant>,
+                row_filter::caller<20, T, D, BrdRowConstant>,
+                row_filter::caller<21, T, D, BrdRowConstant>,
+                row_filter::caller<22, T, D, BrdRowConstant>,
+                row_filter::caller<23, T, D, BrdRowConstant>,
+                row_filter::caller<24, T, D, BrdRowConstant>,
+                row_filter::caller<25, T, D, BrdRowConstant>,
+                row_filter::caller<26, T, D, BrdRowConstant>,
+                row_filter::caller<27, T, D, BrdRowConstant>,
+                row_filter::caller<28, T, D, BrdRowConstant>,
+                row_filter::caller<29, T, D, BrdRowConstant>,
+                row_filter::caller<30, T, D, BrdRowConstant>,
+                row_filter::caller<31, T, D, BrdRowConstant>,
+                row_filter::caller<32, T, D, BrdRowConstant>
             },
             {
                 0,
@@ -254,41 +254,6 @@ namespace filter
                 row_filter::caller<31, T, D, BrdRowReplicate>,
                 row_filter::caller<32, T, D, BrdRowReplicate>
             },
-            {
-                0,
-                row_filter::caller< 1, T, D, BrdRowConstant>,
-                row_filter::caller< 2, T, D, BrdRowConstant>,
-                row_filter::caller< 3, T, D, BrdRowConstant>,
-                row_filter::caller< 4, T, D, BrdRowConstant>,
-                row_filter::caller< 5, T, D, BrdRowConstant>,
-                row_filter::caller< 6, T, D, BrdRowConstant>,
-                row_filter::caller< 7, T, D, BrdRowConstant>,
-                row_filter::caller< 8, T, D, BrdRowConstant>,
-                row_filter::caller< 9, T, D, BrdRowConstant>,
-                row_filter::caller<10, T, D, BrdRowConstant>,
-                row_filter::caller<11, T, D, BrdRowConstant>,
-                row_filter::caller<12, T, D, BrdRowConstant>,
-                row_filter::caller<13, T, D, BrdRowConstant>,
-                row_filter::caller<14, T, D, BrdRowConstant>,
-                row_filter::caller<15, T, D, BrdRowConstant>,
-                row_filter::caller<16, T, D, BrdRowConstant>,
-                row_filter::caller<17, T, D, BrdRowConstant>,
-                row_filter::caller<18, T, D, BrdRowConstant>,
-                row_filter::caller<19, T, D, BrdRowConstant>,
-                row_filter::caller<20, T, D, BrdRowConstant>,
-                row_filter::caller<21, T, D, BrdRowConstant>,
-                row_filter::caller<22, T, D, BrdRowConstant>,
-                row_filter::caller<23, T, D, BrdRowConstant>,
-                row_filter::caller<24, T, D, BrdRowConstant>,
-                row_filter::caller<25, T, D, BrdRowConstant>,
-                row_filter::caller<26, T, D, BrdRowConstant>,
-                row_filter::caller<27, T, D, BrdRowConstant>,
-                row_filter::caller<28, T, D, BrdRowConstant>,
-                row_filter::caller<29, T, D, BrdRowConstant>,
-                row_filter::caller<30, T, D, BrdRowConstant>,
-                row_filter::caller<31, T, D, BrdRowConstant>,
-                row_filter::caller<32, T, D, BrdRowConstant>
-            },
             {
                 0,
                 row_filter::caller< 1, T, D, BrdRowReflect>,
@@ -358,6 +323,41 @@ namespace filter
                 row_filter::caller<30, T, D, BrdRowWrap>,
                 row_filter::caller<31, T, D, BrdRowWrap>,
                 row_filter::caller<32, T, D, BrdRowWrap>
+            },
+            {
+                0,
+                row_filter::caller< 1, T, D, BrdRowReflect101>,
+                row_filter::caller< 2, T, D, BrdRowReflect101>,
+                row_filter::caller< 3, T, D, BrdRowReflect101>,
+                row_filter::caller< 4, T, D, BrdRowReflect101>,
+                row_filter::caller< 5, T, D, BrdRowReflect101>,
+                row_filter::caller< 6, T, D, BrdRowReflect101>,
+                row_filter::caller< 7, T, D, BrdRowReflect101>,
+                row_filter::caller< 8, T, D, BrdRowReflect101>,
+                row_filter::caller< 9, T, D, BrdRowReflect101>,
+                row_filter::caller<10, T, D, BrdRowReflect101>,
+                row_filter::caller<11, T, D, BrdRowReflect101>,
+                row_filter::caller<12, T, D, BrdRowReflect101>,
+                row_filter::caller<13, T, D, BrdRowReflect101>,
+                row_filter::caller<14, T, D, BrdRowReflect101>,
+                row_filter::caller<15, T, D, BrdRowReflect101>,
+                row_filter::caller<16, T, D, BrdRowReflect101>,
+                row_filter::caller<17, T, D, BrdRowReflect101>,
+                row_filter::caller<18, T, D, BrdRowReflect101>,
+                row_filter::caller<19, T, D, BrdRowReflect101>,
+                row_filter::caller<20, T, D, BrdRowReflect101>,
+                row_filter::caller<21, T, D, BrdRowReflect101>,
+                row_filter::caller<22, T, D, BrdRowReflect101>,
+                row_filter::caller<23, T, D, BrdRowReflect101>,
+                row_filter::caller<24, T, D, BrdRowReflect101>,
+                row_filter::caller<25, T, D, BrdRowReflect101>,
+                row_filter::caller<26, T, D, BrdRowReflect101>,
+                row_filter::caller<27, T, D, BrdRowReflect101>,
+                row_filter::caller<28, T, D, BrdRowReflect101>,
+                row_filter::caller<29, T, D, BrdRowReflect101>,
+                row_filter::caller<30, T, D, BrdRowReflect101>,
+                row_filter::caller<31, T, D, BrdRowReflect101>,
+                row_filter::caller<32, T, D, BrdRowReflect101>
             }
         };
 
diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp
index 6416325e1..8232ab804 100644
--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@@ -783,9 +783,6 @@ Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const
 
     CV_Assert(ksize.width * ksize.height <= 16 * 16);
 
-    int gpuBorderType;
-    CV_Assert( tryConvertToGpuBorderType(brd_type, gpuBorderType) );
-
     GpuMat gpu_krnl;
     normalizeKernel(kernel, gpu_krnl, CV_32F);
 
@@ -815,7 +812,7 @@ Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const
         break;
     }
 
-    return Ptr<BaseFilter_GPU>(new GpuFilter2D(ksize, anchor, func, gpu_krnl, gpuBorderType));
+    return Ptr<BaseFilter_GPU>(new GpuFilter2D(ksize, anchor, func, gpu_krnl, brd_type));
 }
 
 Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int borderType)
@@ -936,9 +933,6 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
 
     CV_Assert( borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
 
-    int gpuBorderType;
-    CV_Assert( tryConvertToGpuBorderType(borderType, gpuBorderType) );
-
     const int sdepth = CV_MAT_DEPTH(srcType);
     const int cn = CV_MAT_CN(srcType);
     CV_Assert( sdepth <= CV_64F && cn <= 4 );
@@ -955,7 +949,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
 
     normalizeAnchor(anchor, ksize);
 
-    return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, gpu_row_krnl, func, gpuBorderType));
+    return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, gpu_row_krnl, func, borderType));
 }
 
 namespace
@@ -1041,9 +1035,6 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
 
     CV_Assert( borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
 
-    int gpuBorderType;
-    CV_Assert( tryConvertToGpuBorderType(borderType, gpuBorderType) );
-
     const int ddepth = CV_MAT_DEPTH(dstType);
     const int cn = CV_MAT_CN(dstType);
     CV_Assert( ddepth <= CV_64F && cn <= 4 );
@@ -1060,7 +1051,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
 
     normalizeAnchor(anchor, ksize);
 
-    return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, gpu_col_krnl, func, gpuBorderType));
+    return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, gpu_col_krnl, func, borderType));
 }
 
 Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,
diff --git a/modules/gpuimgproc/src/bilateral_filter.cpp b/modules/gpuimgproc/src/bilateral_filter.cpp
index 0c14987d4..c95dbe4f5 100644
--- a/modules/gpuimgproc/src/bilateral_filter.cpp
+++ b/modules/gpuimgproc/src/bilateral_filter.cpp
@@ -89,11 +89,8 @@ void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, f
 
     CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
 
-    int gpuBorderType;
-    CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
-
     dst.create(src.size(), src.type());
-    func(src, dst, kernel_size, sigma_spatial, sigma_color, gpuBorderType, StreamAccessor::getStream(s));
+    func(src, dst, kernel_size, sigma_spatial, sigma_color, borderMode, StreamAccessor::getStream(s));
 }
 
 #endif
diff --git a/modules/gpuimgproc/src/cuda/bilateral_filter.cu b/modules/gpuimgproc/src/cuda/bilateral_filter.cu
index 444927454..6aa5df27a 100644
--- a/modules/gpuimgproc/src/cuda/bilateral_filter.cu
+++ b/modules/gpuimgproc/src/cuda/bilateral_filter.cu
@@ -150,11 +150,11 @@ namespace cv { namespace gpu { namespace cudev
 
             static caller_t funcs[] =
             {
-                bilateral_caller<T, BrdReflect101>,
-                bilateral_caller<T, BrdReplicate>,
                 bilateral_caller<T, BrdConstant>,
+                bilateral_caller<T, BrdReplicate>,
                 bilateral_caller<T, BrdReflect>,
                 bilateral_caller<T, BrdWrap>,
+                bilateral_caller<T, BrdReflect101>
             };
             funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
         }
diff --git a/modules/gpuimgproc/src/cuda/imgproc.cu b/modules/gpuimgproc/src/cuda/imgproc.cu
index c47076f44..3f39a43eb 100644
--- a/modules/gpuimgproc/src/cuda/imgproc.cu
+++ b/modules/gpuimgproc/src/cuda/imgproc.cu
@@ -269,15 +269,15 @@ namespace cv { namespace gpu { namespace cudev
 
             switch (border_type)
             {
-            case BORDER_REFLECT101_GPU:
+            case BORDER_REFLECT101:
                 cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
                 break;
 
-            case BORDER_REFLECT_GPU:
+            case BORDER_REFLECT:
                 cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
                 break;
 
-            case BORDER_REPLICATE_GPU:
+            case BORDER_REPLICATE:
                 cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst);
                 break;
             }
@@ -381,15 +381,15 @@ namespace cv { namespace gpu { namespace cudev
 
             switch (border_type)
             {
-            case BORDER_REFLECT101_GPU:
+            case BORDER_REFLECT101:
                 cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
                 break;
 
-            case BORDER_REFLECT_GPU:
+            case BORDER_REFLECT:
                 cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
                 break;
 
-            case BORDER_REPLICATE_GPU:
+            case BORDER_REPLICATE:
                 cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst);
                 break;
             }
diff --git a/modules/gpuimgproc/src/imgproc.cpp b/modules/gpuimgproc/src/imgproc.cpp
index 939b14937..100d09186 100644
--- a/modules/gpuimgproc/src/imgproc.cpp
+++ b/modules/gpuimgproc/src/imgproc.cpp
@@ -552,14 +552,11 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& D
 
     CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
 
-    int gpuBorderType;
-    CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
-
     extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
 
     dst.create(src.size(), CV_32F);
 
-    cornerHarris_gpu(blockSize, static_cast<float>(k), Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
+    cornerHarris_gpu(blockSize, static_cast<float>(k), Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
 }
 
 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
@@ -580,14 +577,11 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
 
     CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
 
-    int gpuBorderType;
-    CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
-
     extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
 
     dst.create(src.size(), CV_32F);
 
-    cornerMinEigenVal_gpu(blockSize, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
+    cornerMinEigenVal_gpu(blockSize, Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
 }
 
 
diff --git a/modules/gpuoptflow/src/cuda/optical_flow_farneback.cu b/modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
index e7ff3a02f..68a58c16d 100644
--- a/modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
+++ b/modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
@@ -525,8 +525,11 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
 
         static const caller_t callers[] =
         {
-            gaussianBlurCaller<BrdReflect101<float> >,
+            0 /*gaussianBlurCaller<BrdConstant<float> >*/,
             gaussianBlurCaller<BrdReplicate<float> >,
+            0 /*gaussianBlurCaller<BrdReflect<float> >*/,
+            0 /*gaussianBlurCaller<BrdWrap<float> >*/,
+            gaussianBlurCaller<BrdReflect101<float> >
         };
 
         callers[borderMode](src, ksizeHalf, dst, stream);
@@ -620,8 +623,11 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
 
         static const caller_t callers[] =
         {
-            gaussianBlur5Caller<BrdReflect101<float>,256>,
+            0 /*gaussianBlur5Caller<BrdConstant<float>,256>*/,
             gaussianBlur5Caller<BrdReplicate<float>,256>,
+            0 /*gaussianBlur5Caller<BrdReflect<float>,256>*/,
+            0 /*gaussianBlur5Caller<BrdWrap<float>,256>*/,
+            gaussianBlur5Caller<BrdReflect101<float>,256>
         };
 
         callers[borderMode](src, ksizeHalf, dst, stream);
@@ -634,8 +640,11 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
 
         static const caller_t callers[] =
         {
-            gaussianBlur5Caller<BrdReflect101<float>,128>,
+            0 /*gaussianBlur5Caller<BrdConstant<float>,128>*/,
             gaussianBlur5Caller<BrdReplicate<float>,128>,
+            0 /*gaussianBlur5Caller<BrdReflect<float>,128>*/,
+            0 /*gaussianBlur5Caller<BrdWrap<float>,128>*/,
+            gaussianBlur5Caller<BrdReflect101<float>,128>
         };
 
         callers[borderMode](src, ksizeHalf, dst, stream);
diff --git a/modules/gpuoptflow/src/optical_flow_farneback.cpp b/modules/gpuoptflow/src/optical_flow_farneback.cpp
index 8dbf25b52..efe2436e6 100644
--- a/modules/gpuoptflow/src/optical_flow_farneback.cpp
+++ b/modules/gpuoptflow/src/optical_flow_farneback.cpp
@@ -192,10 +192,10 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_gaussianBlur(
 {
     if (deviceSupports(FEATURE_SET_COMPUTE_12))
         cudev::optflow_farneback::gaussianBlur5Gpu(
-                    M, blockSize/2, bufM, BORDER_REPLICATE_GPU, S(streams[0]));
+                    M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0]));
     else
         cudev::optflow_farneback::gaussianBlur5Gpu_CC11(
-                    M, blockSize/2, bufM, BORDER_REPLICATE_GPU, S(streams[0]));
+                    M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0]));
     swap(M, bufM);
 
     cudev::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0]));
@@ -366,7 +366,7 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
             for (int i = 0; i < 2; i++)
             {
                 cudev::optflow_farneback::gaussianBlurGpu(
-                        frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101_GPU, S(streams[i]));
+                        frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i]));
 #if ENABLE_GPU_RESIZE
                 resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR, streams[i]);
 #else
diff --git a/modules/gpuwarping/src/cuda/remap.cu b/modules/gpuwarping/src/cuda/remap.cu
index dd2c66915..c4ea317fb 100644
--- a/modules/gpuwarping/src/cuda/remap.cu
+++ b/modules/gpuwarping/src/cuda/remap.cu
@@ -212,25 +212,25 @@ namespace cv { namespace gpu { namespace cudev
             static const caller_t callers[3][5] =
             {
                 {
-                    RemapDispatcher<PointFilter, BrdReflect101, T>::call,
-                    RemapDispatcher<PointFilter, BrdReplicate, T>::call,
                     RemapDispatcher<PointFilter, BrdConstant, T>::call,
+                    RemapDispatcher<PointFilter, BrdReplicate, T>::call,
                     RemapDispatcher<PointFilter, BrdReflect, T>::call,
-                    RemapDispatcher<PointFilter, BrdWrap, T>::call
+                    RemapDispatcher<PointFilter, BrdWrap, T>::call,
+                    RemapDispatcher<PointFilter, BrdReflect101, T>::call
                 },
                 {
-                    RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
-                    RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
                     RemapDispatcher<LinearFilter, BrdConstant, T>::call,
+                    RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
                     RemapDispatcher<LinearFilter, BrdReflect, T>::call,
-                    RemapDispatcher<LinearFilter, BrdWrap, T>::call
+                    RemapDispatcher<LinearFilter, BrdWrap, T>::call,
+                    RemapDispatcher<LinearFilter, BrdReflect101, T>::call
                 },
                 {
-                    RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
-                    RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
                     RemapDispatcher<CubicFilter, BrdConstant, T>::call,
+                    RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
                     RemapDispatcher<CubicFilter, BrdReflect, T>::call,
-                    RemapDispatcher<CubicFilter, BrdWrap, T>::call
+                    RemapDispatcher<CubicFilter, BrdWrap, T>::call,
+                    RemapDispatcher<CubicFilter, BrdReflect101, T>::call
                 }
             };
 
diff --git a/modules/gpuwarping/src/cuda/warp.cu b/modules/gpuwarping/src/cuda/warp.cu
index 8c5a067d3..83db79ebf 100644
--- a/modules/gpuwarping/src/cuda/warp.cu
+++ b/modules/gpuwarping/src/cuda/warp.cu
@@ -281,25 +281,25 @@ namespace cv { namespace gpu { namespace cudev
             static const func_t funcs[3][5] =
             {
                 {
-                    WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
-                    WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
                     WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
+                    WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
                     WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
-                    WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
+                    WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call,
+                    WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call
                 },
                 {
-                    WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
-                    WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
                     WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
+                    WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
                     WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
-                    WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
+                    WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call,
+                    WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call
                 },
                 {
-                    WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
-                    WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
                     WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
+                    WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
                     WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
-                    WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
+                    WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call,
+                    WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call
                 }
             };
 
diff --git a/modules/gpuwarping/src/remap.cpp b/modules/gpuwarping/src/remap.cpp
index 315766546..131f93763 100644
--- a/modules/gpuwarping/src/remap.cpp
+++ b/modules/gpuwarping/src/remap.cpp
@@ -83,9 +83,6 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
     const func_t func = funcs[src.depth()][src.channels() - 1];
     CV_Assert(func != 0);
 
-    int gpuBorderType;
-    CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
-
     dst.create(xmap.size(), src.type());
 
     Scalar_<float> borderValueFloat;
@@ -96,7 +93,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
     src.locateROI(wholeSize, ofs);
 
     func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
-        dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
+        dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
 }
 
 #endif // HAVE_CUDA
diff --git a/modules/gpuwarping/src/warp.cpp b/modules/gpuwarping/src/warp.cpp
index e15c11b74..b3c44e562 100644
--- a/modules/gpuwarping/src/warp.cpp
+++ b/modules/gpuwarping/src/warp.cpp
@@ -289,9 +289,6 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
         const func_t func = funcs[src.depth()][src.channels() - 1];
         CV_Assert(func != 0);
 
-        int gpuBorderType;
-        CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
-
         float coeffs[2 * 3];
         Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
 
@@ -308,7 +305,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
         borderValueFloat = borderValue;
 
         func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
-            dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
+            dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
     }
 }
 
@@ -427,9 +424,6 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
         const func_t func = funcs[src.depth()][src.channels() - 1];
         CV_Assert(func != 0);
 
-        int gpuBorderType;
-        CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
-
         float coeffs[3 * 3];
         Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
 
@@ -446,7 +440,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
         borderValueFloat = borderValue;
 
         func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
-            dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
+            dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
     }
 }
 
diff --git a/modules/photo/src/cuda/nlm.cu b/modules/photo/src/cuda/nlm.cu
index 92bfccf37..03044697d 100644
--- a/modules/photo/src/cuda/nlm.cu
+++ b/modules/photo/src/cuda/nlm.cu
@@ -161,11 +161,11 @@ namespace cv { namespace gpu { namespace cudev
 
             static func_t funcs[] =
             {
-                nlm_caller<T, BrdReflect101>,
-                nlm_caller<T, BrdReplicate>,
                 nlm_caller<T, BrdConstant>,
+                nlm_caller<T, BrdReplicate>,
                 nlm_caller<T, BrdReflect>,
                 nlm_caller<T, BrdWrap>,
+                nlm_caller<T, BrdReflect101>
             };
             funcs[borderMode](src, dst, search_radius, block_radius, h, stream);
         }
diff --git a/modules/photo/src/denoising_gpu.cpp b/modules/photo/src/denoising_gpu.cpp
index 21647315c..65d6f8121 100644
--- a/modules/photo/src/denoising_gpu.cpp
+++ b/modules/photo/src/denoising_gpu.cpp
@@ -85,11 +85,8 @@ void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_
     int b = borderMode;
     CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP);
 
-    int gpuBorderType;
-    CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
-
     dst.create(src.size(), src.type());
-    func(src, dst, search_window/2, block_window/2, h, gpuBorderType, StreamAccessor::getStream(s));
+    func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(s));
 }
 
 namespace cv { namespace gpu { namespace cudev

From b4f3d0872578f17e877994a56e357f3c00413a72 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:46:09 +0400
Subject: [PATCH 34/49] gpuarithm module fixes

---
 modules/gpuarithm/CMakeLists.txt              |    2 +-
 modules/gpuarithm/doc/arithm.rst              |  211 +
 modules/gpuarithm/doc/core.rst                |  128 +
 ..._operations.rst => element_operations.rst} |  109 +-
 modules/gpuarithm/doc/gpuarithm.rst           |   13 +-
 .../gpuarithm/doc/operations_on_matrices.rst  |  437 --
 .../{matrix_reductions.rst => reductions.rst} |   86 +-
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |   16 +-
 modules/gpuarithm/perf/perf_arithm.cpp        |  306 ++
 modules/gpuarithm/perf/perf_core.cpp          | 2126 ----------
 .../perf/perf_element_operations.cpp          | 1497 +++++++
 modules/gpuarithm/perf/perf_reductions.cpp    |  466 +++
 modules/gpuarithm/src/arithm.cpp              |  480 +--
 modules/gpuarithm/src/core.cpp                |  488 +++
 modules/gpuarithm/src/element_operations.cpp  |  149 +-
 modules/gpuarithm/src/precomp.hpp             |    1 -
 .../{matrix_reductions.cpp => reductions.cpp} |  138 +-
 modules/gpuarithm/src/split_merge.cpp         |  171 -
 modules/gpuarithm/test/test_arithm.cpp        |  439 ++
 modules/gpuarithm/test/test_core.cpp          | 3612 -----------------
 .../test/test_element_operations.cpp          | 2503 ++++++++++++
 modules/gpuarithm/test/test_reductions.cpp    |  819 ++++
 22 files changed, 7297 insertions(+), 6900 deletions(-)
 create mode 100644 modules/gpuarithm/doc/arithm.rst
 create mode 100644 modules/gpuarithm/doc/core.rst
 rename modules/gpuarithm/doc/{per_element_operations.rst => element_operations.rst} (81%)
 delete mode 100644 modules/gpuarithm/doc/operations_on_matrices.rst
 rename modules/gpuarithm/doc/{matrix_reductions.rst => reductions.rst} (80%)
 create mode 100644 modules/gpuarithm/perf/perf_arithm.cpp
 create mode 100644 modules/gpuarithm/perf/perf_element_operations.cpp
 create mode 100644 modules/gpuarithm/perf/perf_reductions.cpp
 create mode 100644 modules/gpuarithm/src/core.cpp
 rename modules/gpuarithm/src/{matrix_reductions.cpp => reductions.cpp} (94%)
 delete mode 100644 modules/gpuarithm/src/split_merge.cpp
 create mode 100644 modules/gpuarithm/test/test_arithm.cpp
 create mode 100644 modules/gpuarithm/test/test_element_operations.cpp
 create mode 100644 modules/gpuarithm/test/test_reductions.cpp

diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt
index 4b09dc182..7cd9a458b 100644
--- a/modules/gpuarithm/CMakeLists.txt
+++ b/modules/gpuarithm/CMakeLists.txt
@@ -4,7 +4,7 @@ endif()
 
 set(the_description "GPU-accelerated Operations on Matrices")
 
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
 
 ocv_add_module(gpuarithm opencv_core OPTIONAL opencv_gpulegacy)
 
diff --git a/modules/gpuarithm/doc/arithm.rst b/modules/gpuarithm/doc/arithm.rst
new file mode 100644
index 000000000..8a051bc49
--- /dev/null
+++ b/modules/gpuarithm/doc/arithm.rst
@@ -0,0 +1,211 @@
+Arithm Operations on Matrices
+=============================
+
+.. highlight:: cpp
+
+
+
+gpu::gemm
+------------------
+Performs generalized matrix multiplication.
+
+.. ocv:function:: void gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null())
+
+    :param src1: First multiplied input matrix that should have  ``CV_32FC1`` , ``CV_64FC1`` , ``CV_32FC2`` , or  ``CV_64FC2``  type.
+
+    :param src2: Second multiplied input matrix of the same type as  ``src1`` .
+
+    :param alpha: Weight of the matrix product.
+
+    :param src3: Third optional delta matrix added to the matrix product. It should have the same type as  ``src1``  and  ``src2`` .
+
+    :param beta: Weight of  ``src3`` .
+
+    :param dst: Destination matrix. It has the proper size and the same type as input matrices.
+
+    :param flags: Operation flags:
+
+            * **GEMM_1_T** transpose  ``src1``
+            * **GEMM_2_T** transpose  ``src2``
+            * **GEMM_3_T** transpose  ``src3``
+
+    :param stream: Stream for the asynchronous version.
+
+The function performs generalized matrix multiplication similar to the ``gemm`` functions in BLAS level 3. For example, ``gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)`` corresponds to
+
+.. math::
+
+    \texttt{dst} =  \texttt{alpha} \cdot \texttt{src1} ^T  \cdot \texttt{src2} +  \texttt{beta} \cdot \texttt{src3} ^T
+
+.. note:: Transposition operation doesn't support  ``CV_64FC2``  input type.
+
+.. seealso:: :ocv:func:`gemm`
+
+
+
+gpu::mulSpectrums
+---------------------
+Performs a per-element multiplication of two Fourier spectrums.
+
+.. ocv:function:: void gpu::mulSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream=Stream::Null() )
+
+    :param a: First spectrum.
+
+    :param b: Second spectrum with the same size and type as  ``a`` .
+
+    :param c: Destination spectrum.
+
+    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
+
+    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
+
+    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
+
+.. seealso:: :ocv:func:`mulSpectrums`
+
+
+
+gpu::mulAndScaleSpectrums
+-----------------------------
+Performs a per-element multiplication of two Fourier spectrums and scales the result.
+
+.. ocv:function:: void gpu::mulAndScaleSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream=Stream::Null() )
+
+    :param a: First spectrum.
+
+    :param b: Second spectrum with the same size and type as  ``a`` .
+
+    :param c: Destination spectrum.
+
+    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
+
+    :param scale: Scale constant.
+
+    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
+
+    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
+
+.. seealso:: :ocv:func:`mulSpectrums`
+
+
+
+gpu::dft
+------------
+Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix.
+
+.. ocv:function:: void gpu::dft( const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream=Stream::Null() )
+
+    :param src: Source matrix (real or complex).
+
+    :param dst: Destination matrix (real or complex).
+
+    :param dft_size: Size of a discrete Fourier transform.
+
+    :param flags: Optional flags:
+
+        * **DFT_ROWS** transforms each individual row of the source matrix.
+
+        * **DFT_SCALE** scales the result: divide it by the number of elements in the transform (obtained from  ``dft_size`` ).
+
+        * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively).
+
+        * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real.
+
+Use to handle real matrices ( ``CV32FC1`` ) and complex matrices in the interleaved format ( ``CV32FC2`` ).
+
+The source matrix should be continuous, otherwise reallocation and data copying is performed. The function chooses an operation mode depending on the flags, size, and channel count of the source matrix:
+
+    * If the source matrix is complex and the output is not specified as real, the destination matrix is complex and has the ``dft_size``    size and ``CV_32FC2``    type. The destination matrix contains a full result of the DFT (forward or inverse).
+
+    * If the source matrix is complex and the output is specified as real, the function assumes that its input is the result of the forward transform (see the next item). The destination matrix has the ``dft_size`` size and ``CV_32FC1`` type. It contains the result of the inverse DFT.
+
+    * If the source matrix is real (its type is ``CV_32FC1`` ), forward DFT is performed. The result of the DFT is packed into complex ( ``CV_32FC2`` ) matrix. So, the width of the destination matrix is ``dft_size.width / 2 + 1`` . But if the source is a single column, the height is reduced instead of the width.
+
+.. seealso:: :ocv:func:`dft`
+
+
+
+gpu::ConvolveBuf
+----------------
+.. ocv:struct:: gpu::ConvolveBuf
+
+Class providing a memory buffer for :ocv:func:`gpu::convolve` function, plus it allows to adjust some specific parameters. ::
+
+    struct CV_EXPORTS ConvolveBuf
+    {
+        Size result_size;
+        Size block_size;
+        Size user_block_size;
+        Size dft_size;
+        int spect_len;
+
+        GpuMat image_spect, templ_spect, result_spect;
+        GpuMat image_block, templ_block, result_data;
+
+        void create(Size image_size, Size templ_size);
+        static Size estimateBlockSize(Size result_size, Size templ_size);
+    };
+
+You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
+
+
+
+gpu::ConvolveBuf::create
+------------------------
+.. ocv:function:: gpu::ConvolveBuf::create(Size image_size, Size templ_size)
+
+Constructs a buffer for :ocv:func:`gpu::convolve` function with respective arguments.
+
+
+
+gpu::convolve
+-----------------
+Computes a convolution (or cross-correlation) of two images.
+
+.. ocv:function:: void gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr=false)
+
+.. ocv:function:: void gpu::convolve( const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream=Stream::Null() )
+
+    :param image: Source image. Only  ``CV_32FC1`` images are supported for now.
+
+    :param templ: Template image. The size is not greater than the  ``image`` size. The type is the same as  ``image`` .
+
+    :param result: Result image. If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
+
+    :param ccorr: Flags to evaluate cross-correlation instead of convolution.
+
+    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::ConvolveBuf`.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`gpu::filter2D`
+
+
+
+gpu::integral
+-----------------
+Computes an integral image.
+
+.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null())
+
+    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
+
+    :param sum: Integral image containing 32-bit unsigned integer values packed into  ``CV_32SC1`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`integral`
+
+
+
+gpu::sqrIntegral
+--------------------
+Computes a squared integral image.
+
+.. ocv:function:: void gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null())
+
+    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
+
+    :param sqsum: Squared integral image containing 64-bit unsigned integer values packed into  ``CV_64FC1`` .
+
+    :param stream: Stream for the asynchronous version.
diff --git a/modules/gpuarithm/doc/core.rst b/modules/gpuarithm/doc/core.rst
new file mode 100644
index 000000000..50599bcf2
--- /dev/null
+++ b/modules/gpuarithm/doc/core.rst
@@ -0,0 +1,128 @@
+Core Operations on Matrices
+===========================
+
+.. highlight:: cpp
+
+
+
+gpu::merge
+--------------
+Makes a multi-channel matrix out of several single-channel matrices.
+
+.. ocv:function:: void gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null())
+
+    :param src: Array/vector of source matrices.
+
+    :param n: Number of source matrices.
+
+    :param dst: Destination matrix.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`merge`
+
+
+
+gpu::split
+--------------
+Copies each plane of a multi-channel matrix into an array.
+
+.. ocv:function:: void gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param dst: Destination array/vector of single-channel matrices.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`split`
+
+
+
+gpu::copyMakeBorder
+-----------------------
+Forms a border around an image.
+
+.. ocv:function:: void gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null())
+
+    :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and  ``CV_32FC1`` types are supported.
+
+    :param dst: Destination image with the same type as  ``src``. The size is  ``Size(src.cols+left+right, src.rows+top+bottom)`` .
+
+    :param top:
+
+    :param bottom:
+
+    :param left:
+
+    :param right: Number of pixels in each direction from the source image rectangle to extrapolate. For example:  ``top=1, bottom=1, left=1, right=1`` mean that 1 pixel-wide border needs to be built.
+
+    :param borderType: Border type. See  :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
+
+    :param value: Border value.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`copyMakeBorder`
+
+
+
+gpu::transpose
+------------------
+Transposes a matrix.
+
+.. ocv:function:: void gpu::transpose( const GpuMat& src1, GpuMat& dst, Stream& stream=Stream::Null() )
+
+    :param src1: Source matrix. 1-, 4-, 8-byte element sizes are supported for now (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc).
+
+    :param dst: Destination matrix.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`transpose`
+
+
+
+gpu::flip
+-------------
+Flips a 2D matrix around vertical, horizontal, or both axes.
+
+.. ocv:function:: void gpu::flip( const GpuMat& a, GpuMat& b, int flipCode, Stream& stream=Stream::Null() )
+
+    :param a: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth.
+
+    :param b: Destination matrix.
+
+    :param flipCode: Flip mode for the source:
+
+        * ``0`` Flips around x-axis.
+
+        * ``>0`` Flips around y-axis.
+
+        * ``<0`` Flips around both axes.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`flip`
+
+
+
+gpu::LUT
+------------
+Transforms the source matrix into the destination matrix using the given look-up table: ``dst(I) = lut(src(I))``
+
+.. ocv:function:: void gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.  ``CV_8UC1``  and  ``CV_8UC3``  matrices are supported for now.
+
+    :param lut: Look-up table of 256 elements. It is a continuous ``CV_8U`` matrix.
+
+    :param dst: Destination matrix with the same depth as  ``lut``  and the same number of channels as  ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`LUT`
diff --git a/modules/gpuarithm/doc/per_element_operations.rst b/modules/gpuarithm/doc/element_operations.rst
similarity index 81%
rename from modules/gpuarithm/doc/per_element_operations.rst
rename to modules/gpuarithm/doc/element_operations.rst
index bf393a24f..eae2ad7a2 100644
--- a/modules/gpuarithm/doc/per_element_operations.rst
+++ b/modules/gpuarithm/doc/element_operations.rst
@@ -1,5 +1,5 @@
 Per-element Operations
-=======================
+======================
 
 .. highlight:: cpp
 
@@ -112,6 +112,7 @@ This function, in contrast to :ocv:func:`divide`, uses a round-down rounding mod
 .. seealso:: :ocv:func:`divide`
 
 
+
 gpu::addWeighted
 ----------------
 Computes the weighted sum of two arrays.
@@ -465,3 +466,109 @@ Applies a fixed-level threshold to each array element.
     :param stream: Stream for the asynchronous version.
 
 .. seealso:: :ocv:func:`threshold`
+
+
+
+gpu::magnitude
+------------------
+Computes magnitudes of complex matrix elements.
+
+.. ocv:function:: void gpu::magnitude( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
+
+.. ocv:function:: void gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
+
+    :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
+
+    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
+
+    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
+
+    :param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`magnitude`
+
+
+
+gpu::magnitudeSqr
+---------------------
+Computes squared magnitudes of complex matrix elements.
+
+.. ocv:function:: void gpu::magnitudeSqr( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
+
+.. ocv:function:: void gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
+
+    :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
+
+    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
+
+    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
+
+    :param magnitude: Destination matrix of float magnitude squares ( ``CV_32FC1`` ).
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::phase
+--------------
+Computes polar angles of complex matrix elements.
+
+.. ocv:function:: void gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
+
+    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
+
+    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
+
+    :param angle: Destination matrix of angles ( ``CV_32FC1`` ).
+
+    :param angleInDegrees: Flag for angles that must be evaluated in degrees.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`phase`
+
+
+
+gpu::cartToPolar
+--------------------
+Converts Cartesian coordinates into polar.
+
+.. ocv:function:: void gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
+
+    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
+
+    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
+
+    :param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
+
+    :param angle: Destination matrix of angles ( ``CV_32FC1`` ).
+
+    :param angleInDegrees: Flag for angles that must be evaluated in degrees.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`cartToPolar`
+
+
+
+gpu::polarToCart
+--------------------
+Converts polar coordinates into Cartesian.
+
+.. ocv:function:: void gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees=false, Stream& stream = Stream::Null())
+
+    :param magnitude: Source matrix containing magnitudes ( ``CV_32FC1`` ).
+
+    :param angle: Source matrix containing angles ( ``CV_32FC1`` ).
+
+    :param x: Destination matrix of real components ( ``CV_32FC1`` ).
+
+    :param y: Destination matrix of imaginary components ( ``CV_32FC1`` ).
+
+    :param angleInDegrees: Flag that indicates angles in degrees.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`polarToCart`
diff --git a/modules/gpuarithm/doc/gpuarithm.rst b/modules/gpuarithm/doc/gpuarithm.rst
index a8b0f1445..92a31ea43 100644
--- a/modules/gpuarithm/doc/gpuarithm.rst
+++ b/modules/gpuarithm/doc/gpuarithm.rst
@@ -1,10 +1,11 @@
-*******************************************
-gpu. GPU-accelerated Operations on Matrices
-*******************************************
+*************************************************
+gpuarithm. GPU-accelerated Operations on Matrices
+*************************************************
 
 .. toctree::
     :maxdepth: 1
 
-    operations_on_matrices
-    per_element_operations
-    matrix_reductions
+    core
+    element_operations
+    reductions
+    arithm
diff --git a/modules/gpuarithm/doc/operations_on_matrices.rst b/modules/gpuarithm/doc/operations_on_matrices.rst
deleted file mode 100644
index a25100728..000000000
--- a/modules/gpuarithm/doc/operations_on_matrices.rst
+++ /dev/null
@@ -1,437 +0,0 @@
-Operations on Matrices
-======================
-
-.. highlight:: cpp
-
-
-
-gpu::gemm
-------------------
-Performs generalized matrix multiplication.
-
-.. ocv:function:: void gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null())
-
-    :param src1: First multiplied input matrix that should have  ``CV_32FC1`` , ``CV_64FC1`` , ``CV_32FC2`` , or  ``CV_64FC2``  type.
-
-    :param src2: Second multiplied input matrix of the same type as  ``src1`` .
-
-    :param alpha: Weight of the matrix product.
-
-    :param src3: Third optional delta matrix added to the matrix product. It should have the same type as  ``src1``  and  ``src2`` .
-
-    :param beta: Weight of  ``src3`` .
-
-    :param dst: Destination matrix. It has the proper size and the same type as input matrices.
-
-    :param flags: Operation flags:
-
-            * **GEMM_1_T** transpose  ``src1``
-            * **GEMM_2_T** transpose  ``src2``
-            * **GEMM_3_T** transpose  ``src3``
-
-    :param stream: Stream for the asynchronous version.
-
-The function performs generalized matrix multiplication similar to the ``gemm`` functions in BLAS level 3. For example, ``gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)`` corresponds to
-
-.. math::
-
-    \texttt{dst} =  \texttt{alpha} \cdot \texttt{src1} ^T  \cdot \texttt{src2} +  \texttt{beta} \cdot \texttt{src3} ^T
-
-.. note:: Transposition operation doesn't support  ``CV_64FC2``  input type.
-
-.. seealso:: :ocv:func:`gemm`
-
-
-
-gpu::transpose
-------------------
-Transposes a matrix.
-
-.. ocv:function:: void gpu::transpose( const GpuMat& src1, GpuMat& dst, Stream& stream=Stream::Null() )
-
-    :param src1: Source matrix. 1-, 4-, 8-byte element sizes are supported for now (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc).
-
-    :param dst: Destination matrix.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`transpose`
-
-
-
-gpu::flip
--------------
-Flips a 2D matrix around vertical, horizontal, or both axes.
-
-.. ocv:function:: void gpu::flip( const GpuMat& a, GpuMat& b, int flipCode, Stream& stream=Stream::Null() )
-
-    :param a: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth.
-
-    :param b: Destination matrix.
-
-    :param flipCode: Flip mode for the source:
-
-        * ``0`` Flips around x-axis.
-
-        * ``>0`` Flips around y-axis.
-
-        * ``<0`` Flips around both axes.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`flip`
-
-
-
-gpu::LUT
-------------
-Transforms the source matrix into the destination matrix using the given look-up table: ``dst(I) = lut(src(I))``
-
-.. ocv:function:: void gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source matrix.  ``CV_8UC1``  and  ``CV_8UC3``  matrices are supported for now.
-
-    :param lut: Look-up table of 256 elements. It is a continuous ``CV_8U`` matrix.
-
-    :param dst: Destination matrix with the same depth as  ``lut``  and the same number of channels as  ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`LUT`
-
-
-
-gpu::merge
---------------
-Makes a multi-channel matrix out of several single-channel matrices.
-
-.. ocv:function:: void gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Array/vector of source matrices.
-
-    :param n: Number of source matrices.
-
-    :param dst: Destination matrix.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`merge`
-
-
-
-gpu::split
---------------
-Copies each plane of a multi-channel matrix into an array.
-
-.. ocv:function:: void gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream = Stream::Null())
-
-    :param src: Source matrix.
-
-    :param dst: Destination array/vector of single-channel matrices.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`split`
-
-
-
-gpu::magnitude
-------------------
-Computes magnitudes of complex matrix elements.
-
-.. ocv:function:: void gpu::magnitude( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
-
-.. ocv:function:: void gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
-
-    :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
-
-    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
-
-    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
-
-    :param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`magnitude`
-
-
-
-gpu::magnitudeSqr
----------------------
-Computes squared magnitudes of complex matrix elements.
-
-.. ocv:function:: void gpu::magnitudeSqr( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
-
-.. ocv:function:: void gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
-
-    :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
-
-    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
-
-    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
-
-    :param magnitude: Destination matrix of float magnitude squares ( ``CV_32FC1`` ).
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::phase
---------------
-Computes polar angles of complex matrix elements.
-
-.. ocv:function:: void gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
-
-    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
-
-    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
-
-    :param angle: Destination matrix of angles ( ``CV_32FC1`` ).
-
-    :param angleInDegrees: Flag for angles that must be evaluated in degrees.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`phase`
-
-
-
-gpu::cartToPolar
---------------------
-Converts Cartesian coordinates into polar.
-
-.. ocv:function:: void gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
-
-    :param x: Source matrix containing real components ( ``CV_32FC1`` ).
-
-    :param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
-
-    :param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
-
-    :param angle: Destination matrix of angles ( ``CV_32FC1`` ).
-
-    :param angleInDegrees: Flag for angles that must be evaluated in degrees.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`cartToPolar`
-
-
-
-gpu::polarToCart
---------------------
-Converts polar coordinates into Cartesian.
-
-.. ocv:function:: void gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees=false, Stream& stream = Stream::Null())
-
-    :param magnitude: Source matrix containing magnitudes ( ``CV_32FC1`` ).
-
-    :param angle: Source matrix containing angles ( ``CV_32FC1`` ).
-
-    :param x: Destination matrix of real components ( ``CV_32FC1`` ).
-
-    :param y: Destination matrix of imaginary components ( ``CV_32FC1`` ).
-
-    :param angleInDegrees: Flag that indicates angles in degrees.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`polarToCart`
-
-
-
-gpu::normalize
---------------
-Normalizes the norm or value range of an array.
-
-.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat())
-
-.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
-
-    :param src: input array.
-
-    :param dst: output array of the same size as  ``src`` .
-
-    :param alpha: norm value to normalize to or the lower range boundary in case of the range normalization.
-
-    :param beta: upper range boundary in case of the range normalization; it is not used for the norm normalization.
-
-    :param normType: normalization type (see the details below).
-
-    :param dtype: when negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as  ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``.
-
-    :param mask: optional operation mask.
-
-    :param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
-
-    :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
-
-.. seealso:: :ocv:func:`normalize`
-
-
-
-gpu::mulSpectrums
----------------------
-Performs a per-element multiplication of two Fourier spectrums.
-
-.. ocv:function:: void gpu::mulSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream=Stream::Null() )
-
-    :param a: First spectrum.
-
-    :param b: Second spectrum with the same size and type as  ``a`` .
-
-    :param c: Destination spectrum.
-
-    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
-
-    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
-
-    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
-
-.. seealso:: :ocv:func:`mulSpectrums`
-
-
-
-gpu::mulAndScaleSpectrums
------------------------------
-Performs a per-element multiplication of two Fourier spectrums and scales the result.
-
-.. ocv:function:: void gpu::mulAndScaleSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream=Stream::Null() )
-
-    :param a: First spectrum.
-
-    :param b: Second spectrum with the same size and type as  ``a`` .
-
-    :param c: Destination spectrum.
-
-    :param flags: Mock parameter used for CPU/GPU interfaces similarity.
-
-    :param scale: Scale constant.
-
-    :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
-
-    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
-
-.. seealso:: :ocv:func:`mulSpectrums`
-
-
-
-gpu::dft
-------------
-Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix.
-
-.. ocv:function:: void gpu::dft( const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream=Stream::Null() )
-
-    :param src: Source matrix (real or complex).
-
-    :param dst: Destination matrix (real or complex).
-
-    :param dft_size: Size of a discrete Fourier transform.
-
-    :param flags: Optional flags:
-
-        * **DFT_ROWS** transforms each individual row of the source matrix.
-
-        * **DFT_SCALE** scales the result: divide it by the number of elements in the transform (obtained from  ``dft_size`` ).
-
-        * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively).
-
-        * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real.
-
-Use to handle real matrices ( ``CV32FC1`` ) and complex matrices in the interleaved format ( ``CV32FC2`` ).
-
-The source matrix should be continuous, otherwise reallocation and data copying is performed. The function chooses an operation mode depending on the flags, size, and channel count of the source matrix:
-
-    * If the source matrix is complex and the output is not specified as real, the destination matrix is complex and has the ``dft_size``    size and ``CV_32FC2``    type. The destination matrix contains a full result of the DFT (forward or inverse).
-
-    * If the source matrix is complex and the output is specified as real, the function assumes that its input is the result of the forward transform (see the next item). The destination matrix has the ``dft_size`` size and ``CV_32FC1`` type. It contains the result of the inverse DFT.
-
-    * If the source matrix is real (its type is ``CV_32FC1`` ), forward DFT is performed. The result of the DFT is packed into complex ( ``CV_32FC2`` ) matrix. So, the width of the destination matrix is ``dft_size.width / 2 + 1`` . But if the source is a single column, the height is reduced instead of the width.
-
-.. seealso:: :ocv:func:`dft`
-
-
-gpu::ConvolveBuf
-----------------
-.. ocv:struct:: gpu::ConvolveBuf
-
-Class providing a memory buffer for :ocv:func:`gpu::convolve` function, plus it allows to adjust some specific parameters. ::
-
-    struct CV_EXPORTS ConvolveBuf
-    {
-        Size result_size;
-        Size block_size;
-        Size user_block_size;
-        Size dft_size;
-        int spect_len;
-
-        GpuMat image_spect, templ_spect, result_spect;
-        GpuMat image_block, templ_block, result_data;
-
-        void create(Size image_size, Size templ_size);
-        static Size estimateBlockSize(Size result_size, Size templ_size);
-    };
-
-You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
-
-gpu::ConvolveBuf::create
-------------------------
-.. ocv:function:: gpu::ConvolveBuf::create(Size image_size, Size templ_size)
-
-Constructs a buffer for :ocv:func:`gpu::convolve` function with respective arguments.
-
-
-gpu::convolve
------------------
-Computes a convolution (or cross-correlation) of two images.
-
-.. ocv:function:: void gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr=false)
-
-.. ocv:function:: void gpu::convolve( const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream=Stream::Null() )
-
-    :param image: Source image. Only  ``CV_32FC1`` images are supported for now.
-
-    :param templ: Template image. The size is not greater than the  ``image`` size. The type is the same as  ``image`` .
-
-    :param result: Result image. If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
-
-    :param ccorr: Flags to evaluate cross-correlation instead of convolution.
-
-    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::ConvolveBuf`.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`gpu::filter2D`
-
-
-
-gpu::copyMakeBorder
------------------------
-Forms a border around an image.
-
-.. ocv:function:: void gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null())
-
-    :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and  ``CV_32FC1`` types are supported.
-
-    :param dst: Destination image with the same type as  ``src``. The size is  ``Size(src.cols+left+right, src.rows+top+bottom)`` .
-
-    :param top:
-
-    :param bottom:
-
-    :param left:
-
-    :param right: Number of pixels in each direction from the source image rectangle to extrapolate. For example:  ``top=1, bottom=1, left=1, right=1`` mean that 1 pixel-wide border needs to be built.
-
-    :param borderType: Border type. See  :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
-
-    :param value: Border value.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`copyMakeBorder`
diff --git a/modules/gpuarithm/doc/matrix_reductions.rst b/modules/gpuarithm/doc/reductions.rst
similarity index 80%
rename from modules/gpuarithm/doc/matrix_reductions.rst
rename to modules/gpuarithm/doc/reductions.rst
index e9229f8a8..938efc35b 100644
--- a/modules/gpuarithm/doc/matrix_reductions.rst
+++ b/modules/gpuarithm/doc/reductions.rst
@@ -5,25 +5,6 @@ Matrix Reductions
 
 
 
-gpu::meanStdDev
--------------------
-Computes a mean value and a standard deviation of matrix elements.
-
-.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev)
-.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf)
-
-    :param mtx: Source matrix.  ``CV_8UC1``  matrices are supported for now.
-
-    :param mean: Mean value.
-
-    :param stddev: Standard deviation value.
-
-    :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
-
-.. seealso:: :ocv:func:`meanStdDev`
-
-
-
 gpu::norm
 -------------
 Returns the norm of a matrix (or difference of two matrices).
@@ -205,3 +186,70 @@ Reduces a matrix to a vector.
 The function ``reduce`` reduces the matrix to a vector by treating the matrix rows/columns as a set of 1D vectors and performing the specified operation on the vectors until a single row/column is obtained. For example, the function can be used to compute horizontal and vertical projections of a raster image. In case of ``CV_REDUCE_SUM`` and ``CV_REDUCE_AVG`` , the output may have a larger element bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction modes.
 
 .. seealso:: :ocv:func:`reduce`
+
+
+
+gpu::normalize
+--------------
+Normalizes the norm or value range of an array.
+
+.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat())
+
+.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
+
+    :param src: input array.
+
+    :param dst: output array of the same size as  ``src`` .
+
+    :param alpha: norm value to normalize to or the lower range boundary in case of the range normalization.
+
+    :param beta: upper range boundary in case of the range normalization; it is not used for the norm normalization.
+
+    :param normType: normalization type (see the details below).
+
+    :param dtype: when negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as  ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``.
+
+    :param mask: optional operation mask.
+
+    :param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
+
+    :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
+
+.. seealso:: :ocv:func:`normalize`
+
+
+
+gpu::meanStdDev
+-------------------
+Computes a mean value and a standard deviation of matrix elements.
+
+.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev)
+.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf)
+
+    :param mtx: Source matrix.  ``CV_8UC1``  matrices are supported for now.
+
+    :param mean: Mean value.
+
+    :param stddev: Standard deviation value.
+
+    :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
+
+.. seealso:: :ocv:func:`meanStdDev`
+
+
+
+gpu::rectStdDev
+-------------------
+Computes a standard deviation of integral images.
+
+.. ocv:function:: void gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null())
+
+    :param src: Source image. Only the ``CV_32SC1`` type is supported.
+
+    :param sqr: Squared source image. Only  the ``CV_32FC1`` type is supported.
+
+    :param dst: Destination image with the same type and size as  ``src`` .
+
+    :param rect: Rectangular window.
+
+    :param stream: Stream for the asynchronous version.
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index 2ebaf2af1..55d163da4 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -222,12 +222,6 @@ CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, doub
 CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b,
                           int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf);
 
-//! computes mean value and standard deviation of all or selected array elements
-//! supports only CV_8UC1 type
-CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
-//! buffered version
-CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
-
 //! computes norm of array
 //! supports NORM_INF, NORM_L1, NORM_L2
 //! supports all matrices except 64F
@@ -275,8 +269,11 @@ CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf);
 //! reduces a matrix to a vector
 CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
 
-//! applies fixed threshold to the image
-CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
+//! computes mean value and standard deviation of all or selected array elements
+//! supports only CV_8UC1 type
+CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
+//! buffered version
+CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
 
 //! computes the standard deviation of integral images
 //! supports only CV_32SC1 source type and CV_32FC1 sqr type
@@ -287,6 +284,9 @@ CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, co
 CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType,
                                const Scalar& value = Scalar(), Stream& stream = Stream::Null());
 
+//! applies fixed threshold to the image
+CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
+
 //! computes the integral image
 //! sum will have CV_32S type, but will contain unsigned int values
 //! supports only CV_8UC1 source type
diff --git a/modules/gpuarithm/perf/perf_arithm.cpp b/modules/gpuarithm/perf/perf_arithm.cpp
new file mode 100644
index 000000000..b553fc212
--- /dev/null
+++ b/modules/gpuarithm/perf/perf_arithm.cpp
@@ -0,0 +1,306 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// GEMM
+
+CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
+#define ALL_GEMM_FLAGS Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), \
+                              GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
+
+DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags);
+
+PERF_TEST_P(Sz_Type_Flags, GEMM,
+            Combine(Values(cv::Size(512, 512), cv::Size(1024, 1024)),
+                    Values(CV_32FC1, CV_32FC2, CV_64FC1),
+                    ALL_GEMM_FLAGS))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int flags = GET_PARAM(2);
+
+    cv::Mat src1(size, type);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, type);
+    declare.in(src2, WARMUP_RNG);
+
+    cv::Mat src3(size, type);
+    declare.in(src3, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        declare.time(5.0);
+
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        const cv::gpu::GpuMat d_src3(src3);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags);
+
+        GPU_SANITY_CHECK(dst, 1e-6);
+    }
+    else
+    {
+        declare.time(50.0);
+
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
+
+PERF_TEST_P(Sz_Flags, MulSpectrums,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
+
+    cv::Mat a(size, CV_32FC2);
+    cv::Mat b(size, CV_32FC2);
+    declare.in(a, b, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_a(a);
+        const cv::gpu::GpuMat d_b(b);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::mulSpectrums(d_a, d_b, dst, flag);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MulAndScaleSpectrums
+
+PERF_TEST_P(Sz, MulAndScaleSpectrums,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    const float scale = 1.f / size.area();
+
+    cv::Mat src1(size, CV_32FC2);
+    cv::Mat src2(size, CV_32FC2);
+    declare.in(src1,src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Dft
+
+PERF_TEST_P(Sz_Flags, Dft,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
+{
+    declare.time(10.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
+
+    cv::Mat src(size, CV_32FC2);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::dft(d_src, dst, size, flag);
+
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::dft(src, dst, flag);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Convolve
+
+DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool);
+
+PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(17, 27, 32, 64),
+                    Bool()))
+{
+    declare.time(10.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int templ_size = GET_PARAM(1);
+    const bool ccorr = GET_PARAM(2);
+
+    const cv::Mat image(size, CV_32FC1);
+    const cv::Mat templ(templ_size, templ_size, CV_32FC1);
+    declare.in(image, templ, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat d_image = cv::gpu::createContinuous(size, CV_32FC1);
+        d_image.upload(image);
+
+        cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
+        d_templ.upload(templ);
+
+        cv::gpu::GpuMat dst;
+        cv::gpu::ConvolveBuf d_buf;
+
+        TEST_CYCLE() cv::gpu::convolve(d_image, d_templ, dst, ccorr, d_buf);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        if (ccorr)
+            FAIL_NO_CPU();
+
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Integral
+
+PERF_TEST_P(Sz, Integral,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat d_buf;
+
+        TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::integral(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// IntegralSqr
+
+PERF_TEST_P(Sz, IntegralSqr,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
index bb1a89d03..eab6d8736 100644
--- a/modules/gpuarithm/perf/perf_core.cpp
+++ b/modules/gpuarithm/perf/perf_core.cpp
@@ -131,1228 +131,6 @@ PERF_TEST_P(Sz_Depth_Cn, Split,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// AddMat
-
-PERF_TEST_P(Sz_Depth, AddMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::add(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::add(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// AddScalar
-
-PERF_TEST_P(Sz_Depth, AddScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::add(d_src, s, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::add(src, s, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// SubtractMat
-
-PERF_TEST_P(Sz_Depth, SubtractMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::subtract(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::subtract(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// SubtractScalar
-
-PERF_TEST_P(Sz_Depth, SubtractScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::subtract(d_src, s, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::subtract(src, s, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MultiplyMat
-
-PERF_TEST_P(Sz_Depth, MultiplyMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::multiply(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-6);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::multiply(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MultiplyScalar
-
-PERF_TEST_P(Sz_Depth, MultiplyScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::multiply(d_src, s, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-6);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::multiply(src, s, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// DivideMat
-
-PERF_TEST_P(Sz_Depth, DivideMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::divide(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-6);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::divide(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// DivideScalar
-
-PERF_TEST_P(Sz_Depth, DivideScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::divide(d_src, s, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-6);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::divide(src, s, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// DivideScalarInv
-
-PERF_TEST_P(Sz_Depth, DivideScalarInv,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::divide(s[0], d_src, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-6);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::divide(s, src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// AbsDiffMat
-
-PERF_TEST_P(Sz_Depth, AbsDiffMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::absdiff(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::absdiff(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// AbsDiffScalar
-
-PERF_TEST_P(Sz_Depth, AbsDiffScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::absdiff(d_src, s, dst);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::absdiff(src, s, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Abs
-
-PERF_TEST_P(Sz_Depth, Abs,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_16S, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::abs(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Sqr
-
-PERF_TEST_P(Sz_Depth, Sqr,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16S, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::sqr(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Sqrt
-
-PERF_TEST_P(Sz_Depth, Sqrt,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16S, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    cv::randu(src, 0, 100000);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::sqrt(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::sqrt(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Log
-
-PERF_TEST_P(Sz_Depth, Log,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16S, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    cv::randu(src, 0, 100000);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::log(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::log(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Exp
-
-PERF_TEST_P(Sz_Depth, Exp,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16S, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    cv::randu(src, 0, 10);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::exp(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::exp(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Pow
-
-DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double);
-
-PERF_TEST_P(Sz_Depth_Power, Pow,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16S, CV_32F),
-                    Values(0.3, 2.0, 2.4)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const double power = GET_PARAM(2);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::pow(d_src, power, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::pow(src, power, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CompareMat
-
-CV_ENUM(CmpCode, CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE)
-
-DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CmpCode);
-
-PERF_TEST_P(Sz_Depth_Code, CompareMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH,
-                    CmpCode::all()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int cmp_code = GET_PARAM(2);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::compare(d_src1, d_src2, dst, cmp_code);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::compare(src1, src2, dst, cmp_code);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CompareScalar
-
-PERF_TEST_P(Sz_Depth_Code, CompareScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    ARITHM_MAT_DEPTH,
-                    CmpCode::all()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int cmp_code = GET_PARAM(2);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::compare(d_src, s, dst, cmp_code);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::compare(src, s, dst, cmp_code);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseNot
-
-PERF_TEST_P(Sz_Depth, BitwiseNot,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bitwise_not(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bitwise_not(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseAndMat
-
-PERF_TEST_P(Sz_Depth, BitwiseAndMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bitwise_and(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bitwise_and(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseAndScalar
-
-PERF_TEST_P(Sz_Depth_Cn, BitwiseAndScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-    cv::Scalar_<int> is = s;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bitwise_and(d_src, is, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bitwise_and(src, is, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseOrMat
-
-PERF_TEST_P(Sz_Depth, BitwiseOrMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bitwise_or(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bitwise_or(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseOrScalar
-
-PERF_TEST_P(Sz_Depth_Cn, BitwiseOrScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-    cv::Scalar_<int> is = s;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bitwise_or(d_src, is, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bitwise_or(src, is, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseXorMat
-
-PERF_TEST_P(Sz_Depth, BitwiseXorMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bitwise_xor(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bitwise_xor(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseXorScalar
-
-PERF_TEST_P(Sz_Depth_Cn, BitwiseXorScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar s;
-    declare.in(s, WARMUP_RNG);
-    cv::Scalar_<int> is = s;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bitwise_xor(d_src, is, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bitwise_xor(src, is, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// RShift
-
-PERF_TEST_P(Sz_Depth_Cn, RShift,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::rshift(d_src, val, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// LShift
-
-PERF_TEST_P(Sz_Depth_Cn, LShift,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::lshift(d_src, val, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MinMat
-
-PERF_TEST_P(Sz_Depth, MinMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::min(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::min(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MinScalar
-
-PERF_TEST_P(Sz_Depth, MinScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar val;
-    declare.in(val, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::min(d_src, val[0], dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::min(src, val[0], dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MaxMat
-
-PERF_TEST_P(Sz_Depth, MaxMat,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src1(size, depth);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::max(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::max(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MaxScalar
-
-PERF_TEST_P(Sz_Depth, MaxScalar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar val;
-    declare.in(val, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::max(d_src, val[0], dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::max(src, val[0], dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// AddWeighted
-
-DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth);
-
-PERF_TEST_P(Sz_3Depth, AddWeighted,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
-                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
-                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth1 = GET_PARAM(1);
-    const int depth2 = GET_PARAM(2);
-    const int dst_depth = GET_PARAM(3);
-
-    cv::Mat src1(size, depth1);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, depth2);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, dst, dst_depth);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// GEMM
-
-CV_FLAGS(GemmFlags, 0, GEMM_1_T, GEMM_2_T, GEMM_3_T)
-#define ALL_GEMM_FLAGS Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)cv::GEMM_3_T, \
-                              (int)cv::GEMM_1_T | cv::GEMM_2_T, (int)cv::GEMM_1_T | cv::GEMM_3_T, \
-                              (int)cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T)
-
-DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags);
-
-PERF_TEST_P(Sz_Type_Flags, GEMM,
-            Combine(Values(cv::Size(512, 512), cv::Size(1024, 1024)),
-                    Values(CV_32FC1, CV_32FC2, CV_64FC1),
-                    ALL_GEMM_FLAGS))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int type = GET_PARAM(1);
-    const int flags = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, type);
-    declare.in(src2, WARMUP_RNG);
-
-    cv::Mat src3(size, type);
-    declare.in(src3, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        declare.time(5.0);
-
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        const cv::gpu::GpuMat d_src3(src3);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags);
-
-        GPU_SANITY_CHECK(dst, 1e-6);
-    }
-    else
-    {
-        declare.time(50.0);
-
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // Transpose
 
@@ -1498,766 +276,6 @@ PERF_TEST_P(Sz_Type, LutMultiChannel,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// MagnitudeComplex
-
-PERF_TEST_P(Sz, MagnitudeComplex,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_32FC2);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::magnitude(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat xy[2];
-        cv::split(src, xy);
-
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::magnitude(xy[0], xy[1], dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MagnitudeSqrComplex
-
-PERF_TEST_P(Sz, MagnitudeSqrComplex,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_32FC2);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Magnitude
-
-PERF_TEST_P(Sz, Magnitude,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src1(size, CV_32FC1);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, CV_32FC1);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::magnitude(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::magnitude(src1, src2, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MagnitudeSqr
-
-PERF_TEST_P(Sz, MagnitudeSqr,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src1(size, CV_32FC1);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, CV_32FC1);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src1, d_src2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Phase
-
-DEF_PARAM_TEST(Sz_AngleInDegrees, cv::Size, bool);
-
-PERF_TEST_P(Sz_AngleInDegrees, Phase,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Bool()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const bool angleInDegrees = GET_PARAM(1);
-
-    cv::Mat src1(size, CV_32FC1);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, CV_32FC1);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::phase(d_src1, d_src2, dst, angleInDegrees);
-
-        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::phase(src1, src2, dst, angleInDegrees);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CartToPolar
-
-PERF_TEST_P(Sz_AngleInDegrees, CartToPolar,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Bool()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const bool angleInDegrees = GET_PARAM(1);
-
-    cv::Mat src1(size, CV_32FC1);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, CV_32FC1);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat magnitude;
-        cv::gpu::GpuMat angle;
-
-        TEST_CYCLE() cv::gpu::cartToPolar(d_src1, d_src2, magnitude, angle, angleInDegrees);
-
-        GPU_SANITY_CHECK(magnitude);
-        GPU_SANITY_CHECK(angle, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Mat magnitude;
-        cv::Mat angle;
-
-        TEST_CYCLE() cv::cartToPolar(src1, src2, magnitude, angle, angleInDegrees);
-
-        CPU_SANITY_CHECK(magnitude);
-        CPU_SANITY_CHECK(angle);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// PolarToCart
-
-PERF_TEST_P(Sz_AngleInDegrees, PolarToCart,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Bool()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const bool angleInDegrees = GET_PARAM(1);
-
-    cv::Mat magnitude(size, CV_32FC1);
-    declare.in(magnitude, WARMUP_RNG);
-
-    cv::Mat angle(size, CV_32FC1);
-    declare.in(angle, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_magnitude(magnitude);
-        const cv::gpu::GpuMat d_angle(angle);
-        cv::gpu::GpuMat x;
-        cv::gpu::GpuMat y;
-
-        TEST_CYCLE() cv::gpu::polarToCart(d_magnitude, d_angle, x, y, angleInDegrees);
-
-        GPU_SANITY_CHECK(x);
-        GPU_SANITY_CHECK(y);
-    }
-    else
-    {
-        cv::Mat x;
-        cv::Mat y;
-
-        TEST_CYCLE() cv::polarToCart(magnitude, angle, x, y, angleInDegrees);
-
-        CPU_SANITY_CHECK(x);
-        CPU_SANITY_CHECK(y);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MeanStdDev
-
-PERF_TEST_P(Sz, MeanStdDev,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
-        cv::Scalar gpu_mean;
-        cv::Scalar gpu_stddev;
-
-        TEST_CYCLE() cv::gpu::meanStdDev(d_src, gpu_mean, gpu_stddev, d_buf);
-
-        SANITY_CHECK(gpu_mean);
-        SANITY_CHECK(gpu_stddev);
-    }
-    else
-    {
-        cv::Scalar cpu_mean;
-        cv::Scalar cpu_stddev;
-
-        TEST_CYCLE() cv::meanStdDev(src, cpu_mean, cpu_stddev);
-
-        SANITY_CHECK(cpu_mean);
-        SANITY_CHECK(cpu_stddev);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Norm
-
-DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType);
-
-PERF_TEST_P(Sz_Depth_Norm, Norm,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32S, CV_32F),
-                    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int normType = GET_PARAM(2);
-
-    cv::Mat src(size, depth);
-    if (depth == CV_8U)
-        cv::randu(src, 0, 254);
-    else
-        declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
-        double gpu_dst;
-
-        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src, normType, d_buf);
-
-        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        double cpu_dst;
-
-        TEST_CYCLE() cpu_dst = cv::norm(src, normType);
-
-        SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// NormDiff
-
-DEF_PARAM_TEST(Sz_Norm, cv::Size, NormType);
-
-PERF_TEST_P(Sz_Norm, NormDiff,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int normType = GET_PARAM(1);
-
-    cv::Mat src1(size, CV_8UC1);
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat src2(size, CV_8UC1);
-    declare.in(src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        double gpu_dst;
-
-        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, normType);
-
-        SANITY_CHECK(gpu_dst);
-
-    }
-    else
-    {
-        double cpu_dst;
-
-        TEST_CYCLE() cpu_dst = cv::norm(src1, src2, normType);
-
-        SANITY_CHECK(cpu_dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Sum
-
-PERF_TEST_P(Sz_Depth_Cn, Sum,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
-        cv::Scalar gpu_dst;
-
-        TEST_CYCLE() gpu_dst = cv::gpu::sum(d_src, d_buf);
-
-        SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Scalar cpu_dst;
-
-        TEST_CYCLE() cpu_dst = cv::sum(src);
-
-        SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// SumAbs
-
-PERF_TEST_P(Sz_Depth_Cn, SumAbs,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
-        cv::Scalar gpu_dst;
-
-        TEST_CYCLE() gpu_dst = cv::gpu::absSum(d_src, d_buf);
-
-        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// SumSqr
-
-PERF_TEST_P(Sz_Depth_Cn, SumSqr,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values<MatDepth>(CV_8U, CV_16U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
-        cv::Scalar gpu_dst;
-
-        TEST_CYCLE() gpu_dst = cv::gpu::sqrSum(d_src, d_buf);
-
-        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MinMax
-
-PERF_TEST_P(Sz_Depth, MinMax,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    if (depth == CV_8U)
-        cv::randu(src, 0, 254);
-    else
-        declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
-        double gpu_minVal, gpu_maxVal;
-
-        TEST_CYCLE() cv::gpu::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::gpu::GpuMat(), d_buf);
-
-        SANITY_CHECK(gpu_minVal, 1e-10);
-        SANITY_CHECK(gpu_maxVal, 1e-10);
-    }
-    else
-    {
-        double cpu_minVal, cpu_maxVal;
-
-        TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal);
-
-        SANITY_CHECK(cpu_minVal);
-        SANITY_CHECK(cpu_maxVal);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MinMaxLoc
-
-PERF_TEST_P(Sz_Depth, MinMaxLoc,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    if (depth == CV_8U)
-        cv::randu(src, 0, 254);
-    else
-        declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_valbuf, d_locbuf;
-        double gpu_minVal, gpu_maxVal;
-        cv::Point gpu_minLoc, gpu_maxLoc;
-
-        TEST_CYCLE() cv::gpu::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc, cv::gpu::GpuMat(), d_valbuf, d_locbuf);
-
-        SANITY_CHECK(gpu_minVal, 1e-10);
-        SANITY_CHECK(gpu_maxVal, 1e-10);
-    }
-    else
-    {
-        double cpu_minVal, cpu_maxVal;
-        cv::Point cpu_minLoc, cpu_maxLoc;
-
-        TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal, &cpu_minLoc, &cpu_maxLoc);
-
-        SANITY_CHECK(cpu_minVal);
-        SANITY_CHECK(cpu_maxVal);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CountNonZero
-
-PERF_TEST_P(Sz_Depth, CountNonZero,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
-        int gpu_dst = 0;
-
-        TEST_CYCLE() gpu_dst = cv::gpu::countNonZero(d_src, d_buf);
-
-        SANITY_CHECK(gpu_dst);
-    }
-    else
-    {
-        int cpu_dst = 0;
-
-        TEST_CYCLE() cpu_dst = cv::countNonZero(src);
-
-        SANITY_CHECK(cpu_dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Reduce
-
-enum {Rows = 0, Cols = 1};
-CV_ENUM(ReduceCode, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN)
-CV_ENUM(ReduceDim, Rows, Cols)
-
-DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim);
-
-PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_16S, CV_32F),
-                    Values(1, 2, 3, 4),
-                    ReduceCode::all(),
-                    ReduceDim::all()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int reduceOp = GET_PARAM(3);
-    const int dim = GET_PARAM(4);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::reduce(d_src, dst, dim, reduceOp);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::reduce(src, dst, dim, reduceOp);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-//////////////////////////////////////////////////////////////////////
-// Normalize
-
-DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType);
-
-PERF_TEST_P(Sz_Depth_NormType, Normalize,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
-                    Values(NormType(cv::NORM_INF),
-                           NormType(cv::NORM_L1),
-                           NormType(cv::NORM_L2),
-                           NormType(cv::NORM_MINMAX))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int type = GET_PARAM(1);
-    const int norm_type = GET_PARAM(2);
-
-    const double alpha = 1;
-    const double beta = 0;
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_norm_buf, d_cvt_buf;
-
-        TEST_CYCLE() cv::gpu::normalize(d_src, dst, alpha, beta, norm_type, type, cv::gpu::GpuMat(), d_norm_buf, d_cvt_buf);
-
-        GPU_SANITY_CHECK(dst, 1e-6);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::normalize(src, dst, alpha, beta, norm_type, type);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MulSpectrums
-
-CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
-
-DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
-
-PERF_TEST_P(Sz_Flags, MulSpectrums,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(0, DftFlags(cv::DFT_ROWS))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int flag = GET_PARAM(1);
-
-    cv::Mat a(size, CV_32FC2);
-    cv::Mat b(size, CV_32FC2);
-    declare.in(a, b, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_a(a);
-        const cv::gpu::GpuMat d_b(b);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::mulSpectrums(d_a, d_b, dst, flag);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MulAndScaleSpectrums
-
-PERF_TEST_P(Sz, MulAndScaleSpectrums,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const float scale = 1.f / size.area();
-
-    cv::Mat src1(size, CV_32FC2);
-    cv::Mat src2(size, CV_32FC2);
-    declare.in(src1,src2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Dft
-
-PERF_TEST_P(Sz_Flags, Dft,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
-{
-    declare.time(10.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int flag = GET_PARAM(1);
-
-    cv::Mat src(size, CV_32FC2);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::dft(d_src, dst, size, flag);
-
-        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::dft(src, dst, flag);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // CopyMakeBorder
 
@@ -2297,147 +315,3 @@ PERF_TEST_P(Sz_Depth_Cn_Border, CopyMakeBorder,
         CPU_SANITY_CHECK(dst);
     }
 }
-
-//////////////////////////////////////////////////////////////////////
-// Integral
-
-PERF_TEST_P(Sz, Integral,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_buf;
-
-        TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::integral(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// IntegralSqr
-
-PERF_TEST_P(Sz, IntegralSqr,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Convolve
-
-DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool);
-
-PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(17, 27, 32, 64),
-                    Bool()))
-{
-    declare.time(10.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int templ_size = GET_PARAM(1);
-    const bool ccorr = GET_PARAM(2);
-
-    const cv::Mat image(size, CV_32FC1);
-    const cv::Mat templ(templ_size, templ_size, CV_32FC1);
-    declare.in(image, templ, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_image = cv::gpu::createContinuous(size, CV_32FC1);
-        d_image.upload(image);
-
-        cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
-        d_templ.upload(templ);
-
-        cv::gpu::GpuMat dst;
-        cv::gpu::ConvolveBuf d_buf;
-
-        TEST_CYCLE() cv::gpu::convolve(d_image, d_templ, dst, ccorr, d_buf);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        if (ccorr)
-            FAIL_NO_CPU();
-
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Threshold
-
-CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
-#define ALL_THRESH_OPS ValuesIn(ThreshOp::all())
-
-DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp);
-
-PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-            Values(CV_8U, CV_16U, CV_32F, CV_64F),
-            ALL_THRESH_OPS))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int threshOp = GET_PARAM(2);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::threshold(d_src, dst, 100.0, 255.0, threshOp);
-
-        GPU_SANITY_CHECK(dst, 1e-10);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
diff --git a/modules/gpuarithm/perf/perf_element_operations.cpp b/modules/gpuarithm/perf/perf_element_operations.cpp
new file mode 100644
index 000000000..1370da580
--- /dev/null
+++ b/modules/gpuarithm/perf/perf_element_operations.cpp
@@ -0,0 +1,1497 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+#define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F)
+
+//////////////////////////////////////////////////////////////////////
+// AddMat
+
+PERF_TEST_P(Sz_Depth, AddMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::add(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::add(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// AddScalar
+
+PERF_TEST_P(Sz_Depth, AddScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::add(d_src, s, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::add(src, s, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SubtractMat
+
+PERF_TEST_P(Sz_Depth, SubtractMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::subtract(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::subtract(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SubtractScalar
+
+PERF_TEST_P(Sz_Depth, SubtractScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::subtract(d_src, s, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::subtract(src, s, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MultiplyMat
+
+PERF_TEST_P(Sz_Depth, MultiplyMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::multiply(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-6);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::multiply(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MultiplyScalar
+
+PERF_TEST_P(Sz_Depth, MultiplyScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::multiply(d_src, s, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-6);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::multiply(src, s, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// DivideMat
+
+PERF_TEST_P(Sz_Depth, DivideMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::divide(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-6);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::divide(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// DivideScalar
+
+PERF_TEST_P(Sz_Depth, DivideScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::divide(d_src, s, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-6);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::divide(src, s, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// DivideScalarInv
+
+PERF_TEST_P(Sz_Depth, DivideScalarInv,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::divide(s[0], d_src, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-6);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::divide(s, src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// AbsDiffMat
+
+PERF_TEST_P(Sz_Depth, AbsDiffMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::absdiff(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::absdiff(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// AbsDiffScalar
+
+PERF_TEST_P(Sz_Depth, AbsDiffScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::absdiff(d_src, s, dst);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::absdiff(src, s, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Abs
+
+PERF_TEST_P(Sz_Depth, Abs,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_16S, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::abs(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Sqr
+
+PERF_TEST_P(Sz_Depth, Sqr,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::sqr(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Sqrt
+
+PERF_TEST_P(Sz_Depth, Sqrt,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    cv::randu(src, 0, 100000);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::sqrt(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::sqrt(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Log
+
+PERF_TEST_P(Sz_Depth, Log,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    cv::randu(src, 0, 100000);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::log(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::log(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Exp
+
+PERF_TEST_P(Sz_Depth, Exp,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    cv::randu(src, 0, 10);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::exp(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::exp(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Pow
+
+DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double);
+
+PERF_TEST_P(Sz_Depth_Power, Pow,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F),
+                    Values(0.3, 2.0, 2.4)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const double power = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::pow(d_src, power, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::pow(src, power, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// CompareMat
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+
+DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CmpCode);
+
+PERF_TEST_P(Sz_Depth_Code, CompareMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH,
+                    CmpCode::all()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int cmp_code = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::compare(d_src1, d_src2, dst, cmp_code);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::compare(src1, src2, dst, cmp_code);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// CompareScalar
+
+PERF_TEST_P(Sz_Depth_Code, CompareScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH,
+                    CmpCode::all()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int cmp_code = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::compare(d_src, s, dst, cmp_code);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::compare(src, s, dst, cmp_code);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseNot
+
+PERF_TEST_P(Sz_Depth, BitwiseNot,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bitwise_not(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bitwise_not(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseAndMat
+
+PERF_TEST_P(Sz_Depth, BitwiseAndMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bitwise_and(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bitwise_and(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseAndScalar
+
+PERF_TEST_P(Sz_Depth_Cn, BitwiseAndScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+    cv::Scalar_<int> is = s;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bitwise_and(d_src, is, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bitwise_and(src, is, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseOrMat
+
+PERF_TEST_P(Sz_Depth, BitwiseOrMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bitwise_or(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bitwise_or(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseOrScalar
+
+PERF_TEST_P(Sz_Depth_Cn, BitwiseOrScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+    cv::Scalar_<int> is = s;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bitwise_or(d_src, is, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bitwise_or(src, is, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseXorMat
+
+PERF_TEST_P(Sz_Depth, BitwiseXorMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bitwise_xor(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bitwise_xor(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseXorScalar
+
+PERF_TEST_P(Sz_Depth_Cn, BitwiseXorScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+    cv::Scalar_<int> is = s;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bitwise_xor(d_src, is, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bitwise_xor(src, is, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// RShift
+
+PERF_TEST_P(Sz_Depth_Cn, RShift,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    const cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::rshift(d_src, val, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// LShift
+
+PERF_TEST_P(Sz_Depth_Cn, LShift,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    const cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::lshift(d_src, val, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MinMat
+
+PERF_TEST_P(Sz_Depth, MinMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::min(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::min(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MinScalar
+
+PERF_TEST_P(Sz_Depth, MinScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar val;
+    declare.in(val, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::min(d_src, val[0], dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::min(src, val[0], dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MaxMat
+
+PERF_TEST_P(Sz_Depth, MaxMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src1(size, depth);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::max(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::max(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MaxScalar
+
+PERF_TEST_P(Sz_Depth, MaxScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    cv::Scalar val;
+    declare.in(val, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::max(d_src, val[0], dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::max(src, val[0], dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// AddWeighted
+
+DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth);
+
+PERF_TEST_P(Sz_3Depth, AddWeighted,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth1 = GET_PARAM(1);
+    const int depth2 = GET_PARAM(2);
+    const int dst_depth = GET_PARAM(3);
+
+    cv::Mat src1(size, depth1);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, depth2);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, dst, dst_depth);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MagnitudeComplex
+
+PERF_TEST_P(Sz, MagnitudeComplex,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_32FC2);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::magnitude(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat xy[2];
+        cv::split(src, xy);
+
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::magnitude(xy[0], xy[1], dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MagnitudeSqrComplex
+
+PERF_TEST_P(Sz, MagnitudeSqrComplex,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_32FC2);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Magnitude
+
+PERF_TEST_P(Sz, Magnitude,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src1(size, CV_32FC1);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, CV_32FC1);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::magnitude(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::magnitude(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MagnitudeSqr
+
+PERF_TEST_P(Sz, MagnitudeSqr,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src1(size, CV_32FC1);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, CV_32FC1);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src1, d_src2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Phase
+
+DEF_PARAM_TEST(Sz_AngleInDegrees, cv::Size, bool);
+
+PERF_TEST_P(Sz_AngleInDegrees, Phase,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Bool()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const bool angleInDegrees = GET_PARAM(1);
+
+    cv::Mat src1(size, CV_32FC1);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, CV_32FC1);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::phase(d_src1, d_src2, dst, angleInDegrees);
+
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::phase(src1, src2, dst, angleInDegrees);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// CartToPolar
+
+PERF_TEST_P(Sz_AngleInDegrees, CartToPolar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Bool()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const bool angleInDegrees = GET_PARAM(1);
+
+    cv::Mat src1(size, CV_32FC1);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, CV_32FC1);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat magnitude;
+        cv::gpu::GpuMat angle;
+
+        TEST_CYCLE() cv::gpu::cartToPolar(d_src1, d_src2, magnitude, angle, angleInDegrees);
+
+        GPU_SANITY_CHECK(magnitude);
+        GPU_SANITY_CHECK(angle, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat magnitude;
+        cv::Mat angle;
+
+        TEST_CYCLE() cv::cartToPolar(src1, src2, magnitude, angle, angleInDegrees);
+
+        CPU_SANITY_CHECK(magnitude);
+        CPU_SANITY_CHECK(angle);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// PolarToCart
+
+PERF_TEST_P(Sz_AngleInDegrees, PolarToCart,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Bool()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const bool angleInDegrees = GET_PARAM(1);
+
+    cv::Mat magnitude(size, CV_32FC1);
+    declare.in(magnitude, WARMUP_RNG);
+
+    cv::Mat angle(size, CV_32FC1);
+    declare.in(angle, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_magnitude(magnitude);
+        const cv::gpu::GpuMat d_angle(angle);
+        cv::gpu::GpuMat x;
+        cv::gpu::GpuMat y;
+
+        TEST_CYCLE() cv::gpu::polarToCart(d_magnitude, d_angle, x, y, angleInDegrees);
+
+        GPU_SANITY_CHECK(x);
+        GPU_SANITY_CHECK(y);
+    }
+    else
+    {
+        cv::Mat x;
+        cv::Mat y;
+
+        TEST_CYCLE() cv::polarToCart(magnitude, angle, x, y, angleInDegrees);
+
+        CPU_SANITY_CHECK(x);
+        CPU_SANITY_CHECK(y);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Threshold
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+
+DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp);
+
+PERF_TEST_P(Sz_Depth_Op, Threshold,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+            Values(CV_8U, CV_16U, CV_32F, CV_64F),
+            ThreshOp::all()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int threshOp = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::threshold(d_src, dst, 100.0, 255.0, threshOp);
+
+        GPU_SANITY_CHECK(dst, 1e-10);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuarithm/perf/perf_reductions.cpp b/modules/gpuarithm/perf/perf_reductions.cpp
new file mode 100644
index 000000000..8d73180dc
--- /dev/null
+++ b/modules/gpuarithm/perf/perf_reductions.cpp
@@ -0,0 +1,466 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// Norm
+
+DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType);
+
+PERF_TEST_P(Sz_Depth_Norm, Norm,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S, CV_32F),
+                    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int normType = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    if (depth == CV_8U)
+        cv::randu(src, 0, 254);
+    else
+        declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_buf;
+        double gpu_dst;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src, normType, d_buf);
+
+        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        double cpu_dst;
+
+        TEST_CYCLE() cpu_dst = cv::norm(src, normType);
+
+        SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// NormDiff
+
+DEF_PARAM_TEST(Sz_Norm, cv::Size, NormType);
+
+PERF_TEST_P(Sz_Norm, NormDiff,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int normType = GET_PARAM(1);
+
+    cv::Mat src1(size, CV_8UC1);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, CV_8UC1);
+    declare.in(src2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        double gpu_dst;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, normType);
+
+        SANITY_CHECK(gpu_dst);
+
+    }
+    else
+    {
+        double cpu_dst;
+
+        TEST_CYCLE() cpu_dst = cv::norm(src1, src2, normType);
+
+        SANITY_CHECK(cpu_dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Sum
+
+PERF_TEST_P(Sz_Depth_Cn, Sum,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_dst;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::sum(d_src, d_buf);
+
+        SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Scalar cpu_dst;
+
+        TEST_CYCLE() cpu_dst = cv::sum(src);
+
+        SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SumAbs
+
+PERF_TEST_P(Sz_Depth_Cn, SumAbs,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_dst;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::absSum(d_src, d_buf);
+
+        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SumSqr
+
+PERF_TEST_P(Sz_Depth_Cn, SumSqr,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values<MatDepth>(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_dst;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::sqrSum(d_src, d_buf);
+
+        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MinMax
+
+PERF_TEST_P(Sz_Depth, MinMax,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    if (depth == CV_8U)
+        cv::randu(src, 0, 254);
+    else
+        declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_buf;
+        double gpu_minVal, gpu_maxVal;
+
+        TEST_CYCLE() cv::gpu::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::gpu::GpuMat(), d_buf);
+
+        SANITY_CHECK(gpu_minVal, 1e-10);
+        SANITY_CHECK(gpu_maxVal, 1e-10);
+    }
+    else
+    {
+        double cpu_minVal, cpu_maxVal;
+
+        TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal);
+
+        SANITY_CHECK(cpu_minVal);
+        SANITY_CHECK(cpu_maxVal);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MinMaxLoc
+
+PERF_TEST_P(Sz_Depth, MinMaxLoc,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    if (depth == CV_8U)
+        cv::randu(src, 0, 254);
+    else
+        declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_valbuf, d_locbuf;
+        double gpu_minVal, gpu_maxVal;
+        cv::Point gpu_minLoc, gpu_maxLoc;
+
+        TEST_CYCLE() cv::gpu::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc, cv::gpu::GpuMat(), d_valbuf, d_locbuf);
+
+        SANITY_CHECK(gpu_minVal, 1e-10);
+        SANITY_CHECK(gpu_maxVal, 1e-10);
+    }
+    else
+    {
+        double cpu_minVal, cpu_maxVal;
+        cv::Point cpu_minLoc, cpu_maxLoc;
+
+        TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal, &cpu_minLoc, &cpu_maxLoc);
+
+        SANITY_CHECK(cpu_minVal);
+        SANITY_CHECK(cpu_maxVal);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// CountNonZero
+
+PERF_TEST_P(Sz_Depth, CountNonZero,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_buf;
+        int gpu_dst = 0;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::countNonZero(d_src, d_buf);
+
+        SANITY_CHECK(gpu_dst);
+    }
+    else
+    {
+        int cpu_dst = 0;
+
+        TEST_CYCLE() cpu_dst = cv::countNonZero(src);
+
+        SANITY_CHECK(cpu_dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Reduce
+
+CV_ENUM(ReduceCode, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN)
+
+enum {Rows = 0, Cols = 1};
+CV_ENUM(ReduceDim, Rows, Cols)
+
+DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim);
+
+PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_16S, CV_32F),
+                    Values(1, 2, 3, 4),
+                    ReduceCode::all(),
+                    ReduceDim::all()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int reduceOp = GET_PARAM(3);
+    const int dim = GET_PARAM(4);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::reduce(d_src, dst, dim, reduceOp);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::reduce(src, dst, dim, reduceOp);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Normalize
+
+DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType);
+
+PERF_TEST_P(Sz_Depth_NormType, Normalize,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    Values(NormType(cv::NORM_INF),
+                           NormType(cv::NORM_L1),
+                           NormType(cv::NORM_L2),
+                           NormType(cv::NORM_MINMAX))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int norm_type = GET_PARAM(2);
+
+    const double alpha = 1;
+    const double beta = 0;
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat d_norm_buf, d_cvt_buf;
+
+        TEST_CYCLE() cv::gpu::normalize(d_src, dst, alpha, beta, norm_type, type, cv::gpu::GpuMat(), d_norm_buf, d_cvt_buf);
+
+        GPU_SANITY_CHECK(dst, 1e-6);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::normalize(src, dst, alpha, beta, norm_type, type);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MeanStdDev
+
+PERF_TEST_P(Sz, MeanStdDev,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_mean;
+        cv::Scalar gpu_stddev;
+
+        TEST_CYCLE() cv::gpu::meanStdDev(d_src, gpu_mean, gpu_stddev, d_buf);
+
+        SANITY_CHECK(gpu_mean);
+        SANITY_CHECK(gpu_stddev);
+    }
+    else
+    {
+        cv::Scalar cpu_mean;
+        cv::Scalar cpu_stddev;
+
+        TEST_CYCLE() cv::meanStdDev(src, cpu_mean, cpu_stddev);
+
+        SANITY_CHECK(cpu_mean);
+        SANITY_CHECK(cpu_stddev);
+    }
+}
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index 908d96341..c605b989e 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -48,25 +48,17 @@ using namespace cv::gpu;
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
 void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); }
-void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); }
 void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::ConvolveBuf::create(Size, Size) { throw_no_cuda(); }
 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_no_cuda(); }
 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream&) { throw_no_cuda(); }
@@ -308,468 +300,6 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
 #endif
 }
 
-////////////////////////////////////////////////////////////////////////
-// transpose
-
-namespace arithm
-{
-    template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream);
-}
-
-void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
-{
-    CV_Assert( src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8 );
-
-    dst.create( src.cols, src.rows, src.type() );
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    if (src.elemSize() == 1)
-    {
-        NppStreamHandler h(stream);
-
-        NppiSize sz;
-        sz.width  = src.cols;
-        sz.height = src.rows;
-
-        nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-    else if (src.elemSize() == 4)
-    {
-        arithm::transpose<int>(src, dst, stream);
-    }
-    else // if (src.elemSize() == 8)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-
-        arithm::transpose<double>(src, dst, stream);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////
-// flip
-
-namespace
-{
-    template<int DEPTH> struct NppTypeTraits;
-    template<> struct NppTypeTraits<CV_8U>  { typedef Npp8u npp_t; };
-    template<> struct NppTypeTraits<CV_8S>  { typedef Npp8s npp_t; };
-    template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
-    template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
-    template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
-    template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
-    template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
-
-    template <int DEPTH> struct NppMirrorFunc
-    {
-        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
-
-        typedef NppStatus (*func_t)(const npp_t* pSrc, int nSrcStep, npp_t* pDst, int nDstStep, NppiSize oROI, NppiAxis flip);
-    };
-
-    template <int DEPTH, typename NppMirrorFunc<DEPTH>::func_t func> struct NppMirror
-    {
-        typedef typename NppMirrorFunc<DEPTH>::npp_t npp_t;
-
-        static void call(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream)
-        {
-            NppStreamHandler h(stream);
-
-            NppiSize sz;
-            sz.width  = src.cols;
-            sz.height = src.rows;
-
-            nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step),
-                dst.ptr<npp_t>(), static_cast<int>(dst.step), sz,
-                (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-}
-
-void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
-{
-    typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
-    static const func_t funcs[6][4] =
-    {
-        {NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call},
-        {0,0,0,0},
-        {NppMirror<CV_16U, nppiMirror_16u_C1R>::call, 0, NppMirror<CV_16U, nppiMirror_16u_C3R>::call, NppMirror<CV_16U, nppiMirror_16u_C4R>::call},
-        {0,0,0,0},
-        {NppMirror<CV_32S, nppiMirror_32s_C1R>::call, 0, NppMirror<CV_32S, nppiMirror_32s_C3R>::call, NppMirror<CV_32S, nppiMirror_32s_C4R>::call},
-        {NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
-    };
-
-    CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
-    CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
-
-    dst.create(src.size(), src.type());
-
-    funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// LUT
-
-void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
-{
-    const int cn = src.channels();
-
-    CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
-    CV_Assert( lut.depth() == CV_8U );
-    CV_Assert( lut.channels() == 1 || lut.channels() == cn );
-    CV_Assert( lut.rows * lut.cols == 256 && lut.isContinuous() );
-
-    dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn));
-
-    NppiSize sz;
-    sz.height = src.rows;
-    sz.width = src.cols;
-
-    Mat nppLut;
-    lut.convertTo(nppLut, CV_32S);
-
-    int nValues3[] = {256, 256, 256};
-
-    Npp32s pLevels[256];
-    for (int i = 0; i < 256; ++i)
-        pLevels[i] = i;
-
-    const Npp32s* pLevels3[3];
-
-#if (CUDA_VERSION <= 4020)
-    pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
-#else
-    GpuMat d_pLevels;
-    d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
-    pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
-#endif
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-    NppStreamHandler h(stream);
-
-    if (src.type() == CV_8UC1)
-    {
-#if (CUDA_VERSION <= 4020)
-        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), pLevels, 256) );
-#else
-        GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
-        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), d_pLevels.ptr<Npp32s>(), 256) );
-#endif
-    }
-    else
-    {
-        const Npp32s* pValues3[3];
-
-        Mat nppLut3[3];
-        if (nppLut.channels() == 1)
-        {
-#if (CUDA_VERSION <= 4020)
-            pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
-#else
-            GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
-            pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>();
-#endif
-        }
-        else
-        {
-            cv::split(nppLut, nppLut3);
-
-#if (CUDA_VERSION <= 4020)
-            pValues3[0] = nppLut3[0].ptr<Npp32s>();
-            pValues3[1] = nppLut3[1].ptr<Npp32s>();
-            pValues3[2] = nppLut3[2].ptr<Npp32s>();
-#else
-            GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data));
-            GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data));
-            GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data));
-
-            pValues3[0] = d_nppLut0.ptr<Npp32s>();
-            pValues3[1] = d_nppLut1.ptr<Npp32s>();
-            pValues3[2] = d_nppLut2.ptr<Npp32s>();
-#endif
-        }
-
-        nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, pLevels3, nValues3) );
-    }
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-}
-
-////////////////////////////////////////////////////////////////////////
-// NPP magnitide
-
-namespace
-{
-    typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
-
-    inline void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream)
-    {
-        CV_Assert(src.type() == CV_32FC2);
-
-        dst.create(src.size(), CV_32FC1);
-
-        NppiSize sz;
-        sz.width = src.cols;
-        sz.height = src.rows;
-
-        NppStreamHandler h(stream);
-
-        nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-}
-
-void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream)
-{
-    npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
-{
-    npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// Polar <-> Cart
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace mathfunc
-    {
-        void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
-        void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
-    }
-}}}
-
-namespace
-{
-    inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
-    {
-        using namespace ::cv::gpu::cudev::mathfunc;
-
-        CV_Assert(x.size() == y.size() && x.type() == y.type());
-        CV_Assert(x.depth() == CV_32F);
-
-        if (mag)
-            mag->create(x.size(), x.type());
-        if (angle)
-            angle->create(x.size(), x.type());
-
-        GpuMat x1cn = x.reshape(1);
-        GpuMat y1cn = y.reshape(1);
-        GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
-        GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
-
-        cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
-    }
-
-    inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
-    {
-        using namespace ::cv::gpu::cudev::mathfunc;
-
-        CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
-        CV_Assert(mag.depth() == CV_32F);
-
-        x.create(mag.size(), mag.type());
-        y.create(mag.size(), mag.type());
-
-        GpuMat mag1cn = mag.reshape(1);
-        GpuMat angle1cn = angle.reshape(1);
-        GpuMat x1cn = x.reshape(1);
-        GpuMat y1cn = y.reshape(1);
-
-        polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
-    }
-}
-
-void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
-{
-    cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
-{
-    cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
-{
-    cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
-{
-    cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
-{
-    polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// normalize
-
-void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask)
-{
-    GpuMat norm_buf;
-    GpuMat cvt_buf;
-    normalize(src, dst, a, b, norm_type, dtype, mask, norm_buf, cvt_buf);
-}
-
-void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
-{
-    double scale = 1, shift = 0;
-    if (norm_type == NORM_MINMAX)
-    {
-        double smin = 0, smax = 0;
-        double dmin = std::min(a, b), dmax = std::max(a, b);
-        gpu::minMax(src, &smin, &smax, mask, norm_buf);
-        scale = (dmax - dmin) * (smax - smin > std::numeric_limits<double>::epsilon() ? 1.0 / (smax - smin) : 0.0);
-        shift = dmin - smin * scale;
-    }
-    else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF)
-    {
-        scale = gpu::norm(src, norm_type, mask, norm_buf);
-        scale = scale > std::numeric_limits<double>::epsilon() ? a / scale : 0.0;
-        shift = 0;
-    }
-    else
-    {
-        CV_Error(cv::Error::StsBadArg, "Unknown/unsupported norm type");
-    }
-
-    if (mask.empty())
-    {
-        src.convertTo(dst, dtype, scale, shift);
-    }
-    else
-    {
-        src.convertTo(cvt_buf, dtype, scale, shift);
-        cvt_buf.copyTo(dst, mask);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////
-// copyMakeBorder
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
-    }
-}}}
-
-namespace
-{
-    template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
-    {
-        using namespace ::cv::gpu::cudev::imgproc;
-
-        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
-
-        copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);
-    }
-}
-
-#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
-typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
-#else
-typedef Npp32s Npp32s_a;
-#endif
-
-void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)
-{
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-    CV_Assert(borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
-
-    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
-    {
-        NppiSize srcsz;
-        srcsz.width  = src.cols;
-        srcsz.height = src.rows;
-
-        NppiSize dstsz;
-        dstsz.width  = dst.cols;
-        dstsz.height = dst.rows;
-
-        NppStreamHandler h(stream);
-
-        switch (src.type())
-        {
-        case CV_8UC1:
-            {
-                Npp8u nVal = saturate_cast<Npp8u>(value[0]);
-                nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        case CV_8UC4:
-            {
-                Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};
-                nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        case CV_32SC1:
-            {
-                Npp32s nVal = saturate_cast<Npp32s>(value[0]);
-                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        case CV_32FC1:
-            {
-                Npp32f val = saturate_cast<Npp32f>(value[0]);
-                Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
-                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
-                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
-                break;
-            }
-        }
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-    else
-    {
-        typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
-        static const caller_t callers[6][4] =
-        {
-            {   copyMakeBorder_caller<uchar, 1>  ,    copyMakeBorder_caller<uchar, 2>   ,    copyMakeBorder_caller<uchar, 3>  ,    copyMakeBorder_caller<uchar, 4>},
-            {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},
-            {   copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/,    copyMakeBorder_caller<ushort, 3> ,    copyMakeBorder_caller<ushort, 4>},
-            {   copyMakeBorder_caller<short, 1>  , 0/*copyMakeBorder_caller<short, 2>*/ ,    copyMakeBorder_caller<short, 3>  ,    copyMakeBorder_caller<short, 4>},
-            {0/*copyMakeBorder_caller<int,   1>*/, 0/*copyMakeBorder_caller<int,   2>*/ , 0/*copyMakeBorder_caller<int,   3>*/, 0/*copyMakeBorder_caller<int  , 4>*/},
-            {   copyMakeBorder_caller<float, 1>  , 0/*copyMakeBorder_caller<float, 2>*/ ,    copyMakeBorder_caller<float, 3>  ,    copyMakeBorder_caller<float ,4>}
-        };
-
-        caller_t func = callers[src.depth()][src.channels() - 1];
-        CV_Assert(func != 0);
-
-        func(src, dst, top, left, borderType, value, stream);
-    }
-}
-
 ////////////////////////////////////////////////////////////////////////
 // integral
 
diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp
new file mode 100644
index 000000000..bd0277cde
--- /dev/null
+++ b/modules/gpuarithm/src/core.cpp
@@ -0,0 +1,488 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
+void cv::gpu::merge(const std::vector<GpuMat>& /*src*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
+
+void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
+void cv::gpu::split(const GpuMat& /*src*/, std::vector<GpuMat>& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
+
+void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+////////////////////////////////////////////////////////////////////////
+// merge/split
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace split_merge
+    {
+        void merge_caller(const PtrStepSzb* src, PtrStepSzb& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
+        void split_caller(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
+    }
+}}}
+
+namespace
+{
+    void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
+    {
+        using namespace ::cv::gpu::cudev::split_merge;
+
+        CV_Assert(src);
+        CV_Assert(n > 0);
+
+        int depth = src[0].depth();
+        Size size = src[0].size();
+
+        if (depth == CV_64F)
+        {
+            if (!deviceSupports(NATIVE_DOUBLE))
+                CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
+        }
+
+        bool single_channel_only = true;
+        int total_channels = 0;
+
+        for (size_t i = 0; i < n; ++i)
+        {
+            CV_Assert(src[i].size() == size);
+            CV_Assert(src[i].depth() == depth);
+            single_channel_only = single_channel_only && src[i].channels() == 1;
+            total_channels += src[i].channels();
+        }
+
+        CV_Assert(single_channel_only);
+        CV_Assert(total_channels <= 4);
+
+        if (total_channels == 1)
+            src[0].copyTo(dst);
+        else
+        {
+            dst.create(size, CV_MAKETYPE(depth, total_channels));
+
+            PtrStepSzb src_as_devmem[4];
+            for(size_t i = 0; i < n; ++i)
+                src_as_devmem[i] = src[i];
+
+            PtrStepSzb dst_as_devmem(dst);
+            merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream);
+        }
+    }
+
+    void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream)
+    {
+        using namespace ::cv::gpu::cudev::split_merge;
+
+        CV_Assert(dst);
+
+        int depth = src.depth();
+        int num_channels = src.channels();
+
+        if (depth == CV_64F)
+        {
+            if (!deviceSupports(NATIVE_DOUBLE))
+                CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
+        }
+
+        if (num_channels == 1)
+        {
+            src.copyTo(dst[0]);
+            return;
+        }
+
+        for (int i = 0; i < num_channels; ++i)
+            dst[i].create(src.size(), depth);
+
+        CV_Assert(num_channels <= 4);
+
+        PtrStepSzb dst_as_devmem[4];
+        for (int i = 0; i < num_channels; ++i)
+            dst_as_devmem[i] = dst[i];
+
+        PtrStepSzb src_as_devmem(src);
+        split_caller(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), stream);
+    }
+}
+
+void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream)
+{
+    ::merge(src, n, dst, StreamAccessor::getStream(stream));
+}
+
+
+void cv::gpu::merge(const std::vector<GpuMat>& src, GpuMat& dst, Stream& stream)
+{
+    ::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream)
+{
+    ::split(src, dst, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::split(const GpuMat& src, std::vector<GpuMat>& dst, Stream& stream)
+{
+    dst.resize(src.channels());
+    if(src.channels() > 0)
+        ::split(src, &dst[0], StreamAccessor::getStream(stream));
+}
+
+////////////////////////////////////////////////////////////////////////
+// transpose
+
+namespace arithm
+{
+    template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream);
+}
+
+void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
+{
+    CV_Assert( src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8 );
+
+    dst.create( src.cols, src.rows, src.type() );
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    if (src.elemSize() == 1)
+    {
+        NppStreamHandler h(stream);
+
+        NppiSize sz;
+        sz.width  = src.cols;
+        sz.height = src.rows;
+
+        nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
+            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+    else if (src.elemSize() == 4)
+    {
+        arithm::transpose<int>(src, dst, stream);
+    }
+    else // if (src.elemSize() == 8)
+    {
+        if (!deviceSupports(NATIVE_DOUBLE))
+            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
+
+        arithm::transpose<double>(src, dst, stream);
+    }
+}
+
+////////////////////////////////////////////////////////////////////////
+// flip
+
+namespace
+{
+    template<int DEPTH> struct NppTypeTraits;
+    template<> struct NppTypeTraits<CV_8U>  { typedef Npp8u npp_t; };
+    template<> struct NppTypeTraits<CV_8S>  { typedef Npp8s npp_t; };
+    template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
+    template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
+    template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
+    template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
+    template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
+
+    template <int DEPTH> struct NppMirrorFunc
+    {
+        typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
+
+        typedef NppStatus (*func_t)(const npp_t* pSrc, int nSrcStep, npp_t* pDst, int nDstStep, NppiSize oROI, NppiAxis flip);
+    };
+
+    template <int DEPTH, typename NppMirrorFunc<DEPTH>::func_t func> struct NppMirror
+    {
+        typedef typename NppMirrorFunc<DEPTH>::npp_t npp_t;
+
+        static void call(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream)
+        {
+            NppStreamHandler h(stream);
+
+            NppiSize sz;
+            sz.width  = src.cols;
+            sz.height = src.rows;
+
+            nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step),
+                dst.ptr<npp_t>(), static_cast<int>(dst.step), sz,
+                (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+}
+
+void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
+    static const func_t funcs[6][4] =
+    {
+        {NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call},
+        {0,0,0,0},
+        {NppMirror<CV_16U, nppiMirror_16u_C1R>::call, 0, NppMirror<CV_16U, nppiMirror_16u_C3R>::call, NppMirror<CV_16U, nppiMirror_16u_C4R>::call},
+        {0,0,0,0},
+        {NppMirror<CV_32S, nppiMirror_32s_C1R>::call, 0, NppMirror<CV_32S, nppiMirror_32s_C3R>::call, NppMirror<CV_32S, nppiMirror_32s_C4R>::call},
+        {NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
+    };
+
+    CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
+    CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
+
+    dst.create(src.size(), src.type());
+
+    funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
+}
+
+////////////////////////////////////////////////////////////////////////
+// LUT
+
+void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
+{
+    const int cn = src.channels();
+
+    CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
+    CV_Assert( lut.depth() == CV_8U );
+    CV_Assert( lut.channels() == 1 || lut.channels() == cn );
+    CV_Assert( lut.rows * lut.cols == 256 && lut.isContinuous() );
+
+    dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn));
+
+    NppiSize sz;
+    sz.height = src.rows;
+    sz.width = src.cols;
+
+    Mat nppLut;
+    lut.convertTo(nppLut, CV_32S);
+
+    int nValues3[] = {256, 256, 256};
+
+    Npp32s pLevels[256];
+    for (int i = 0; i < 256; ++i)
+        pLevels[i] = i;
+
+    const Npp32s* pLevels3[3];
+
+#if (CUDA_VERSION <= 4020)
+    pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
+#else
+    GpuMat d_pLevels;
+    d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
+    pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
+#endif
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+    NppStreamHandler h(stream);
+
+    if (src.type() == CV_8UC1)
+    {
+#if (CUDA_VERSION <= 4020)
+        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
+            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), pLevels, 256) );
+#else
+        GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
+        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
+            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), d_pLevels.ptr<Npp32s>(), 256) );
+#endif
+    }
+    else
+    {
+        const Npp32s* pValues3[3];
+
+        Mat nppLut3[3];
+        if (nppLut.channels() == 1)
+        {
+#if (CUDA_VERSION <= 4020)
+            pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
+#else
+            GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
+            pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>();
+#endif
+        }
+        else
+        {
+            cv::split(nppLut, nppLut3);
+
+#if (CUDA_VERSION <= 4020)
+            pValues3[0] = nppLut3[0].ptr<Npp32s>();
+            pValues3[1] = nppLut3[1].ptr<Npp32s>();
+            pValues3[2] = nppLut3[2].ptr<Npp32s>();
+#else
+            GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data));
+            GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data));
+            GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data));
+
+            pValues3[0] = d_nppLut0.ptr<Npp32s>();
+            pValues3[1] = d_nppLut1.ptr<Npp32s>();
+            pValues3[2] = d_nppLut2.ptr<Npp32s>();
+#endif
+        }
+
+        nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
+            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, pLevels3, nValues3) );
+    }
+
+    if (stream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
+}
+
+////////////////////////////////////////////////////////////////////////
+// copyMakeBorder
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
+    }
+}}}
+
+namespace
+{
+    template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
+    {
+        using namespace ::cv::gpu::cudev::imgproc;
+
+        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
+
+        copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);
+    }
+}
+
+#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
+typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
+#else
+typedef Npp32s Npp32s_a;
+#endif
+
+void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)
+{
+    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+    CV_Assert(borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
+
+    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
+    {
+        NppiSize srcsz;
+        srcsz.width  = src.cols;
+        srcsz.height = src.rows;
+
+        NppiSize dstsz;
+        dstsz.width  = dst.cols;
+        dstsz.height = dst.rows;
+
+        NppStreamHandler h(stream);
+
+        switch (src.type())
+        {
+        case CV_8UC1:
+            {
+                Npp8u nVal = saturate_cast<Npp8u>(value[0]);
+                nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        case CV_8UC4:
+            {
+                Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};
+                nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        case CV_32SC1:
+            {
+                Npp32s nVal = saturate_cast<Npp32s>(value[0]);
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        case CV_32FC1:
+            {
+                Npp32f val = saturate_cast<Npp32f>(value[0]);
+                Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
+                break;
+            }
+        }
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+    else
+    {
+        typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
+        static const caller_t callers[6][4] =
+        {
+            {   copyMakeBorder_caller<uchar, 1>  ,    copyMakeBorder_caller<uchar, 2>   ,    copyMakeBorder_caller<uchar, 3>  ,    copyMakeBorder_caller<uchar, 4>},
+            {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},
+            {   copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/,    copyMakeBorder_caller<ushort, 3> ,    copyMakeBorder_caller<ushort, 4>},
+            {   copyMakeBorder_caller<short, 1>  , 0/*copyMakeBorder_caller<short, 2>*/ ,    copyMakeBorder_caller<short, 3>  ,    copyMakeBorder_caller<short, 4>},
+            {0/*copyMakeBorder_caller<int,   1>*/, 0/*copyMakeBorder_caller<int,   2>*/ , 0/*copyMakeBorder_caller<int,   3>*/, 0/*copyMakeBorder_caller<int  , 4>*/},
+            {   copyMakeBorder_caller<float, 1>  , 0/*copyMakeBorder_caller<float, 2>*/ ,    copyMakeBorder_caller<float, 3>  ,    copyMakeBorder_caller<float ,4>}
+        };
+
+        caller_t func = callers[src.depth()][src.channels() - 1];
+        CV_Assert(func != 0);
+
+        func(src, dst, top, left, borderType, value, stream);
+    }
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp
index f76656019..e81833106 100644
--- a/modules/gpuarithm/src/element_operations.cpp
+++ b/modules/gpuarithm/src/element_operations.cpp
@@ -49,39 +49,72 @@ using namespace cv::gpu;
 
 void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::divide(double, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::abs(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::sqr(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::sqrt(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::exp(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::log(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::compare(const GpuMat&, Scalar, GpuMat&, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::bitwise_or(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::rshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::lshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
+
 double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_no_cuda(); return 0.0;}
 
+void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
+
 #else
 
 ////////////////////////////////////////////////////////////////////////
@@ -3283,4 +3316,118 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
     return thresh;
 }
 
+////////////////////////////////////////////////////////////////////////
+// NPP magnitide
+
+namespace
+{
+    typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
+
+    inline void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream)
+    {
+        CV_Assert(src.type() == CV_32FC2);
+
+        dst.create(src.size(), CV_32FC1);
+
+        NppiSize sz;
+        sz.width = src.cols;
+        sz.height = src.rows;
+
+        NppStreamHandler h(stream);
+
+        nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+}
+
+void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream)
+{
+    npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
+{
+    npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
+}
+
+////////////////////////////////////////////////////////////////////////
+// Polar <-> Cart
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace mathfunc
+    {
+        void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
+        void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
+    }
+}}}
+
+namespace
+{
+    inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
+    {
+        using namespace ::cv::gpu::cudev::mathfunc;
+
+        CV_Assert(x.size() == y.size() && x.type() == y.type());
+        CV_Assert(x.depth() == CV_32F);
+
+        if (mag)
+            mag->create(x.size(), x.type());
+        if (angle)
+            angle->create(x.size(), x.type());
+
+        GpuMat x1cn = x.reshape(1);
+        GpuMat y1cn = y.reshape(1);
+        GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
+        GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
+
+        cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
+    }
+
+    inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
+    {
+        using namespace ::cv::gpu::cudev::mathfunc;
+
+        CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
+        CV_Assert(mag.depth() == CV_32F);
+
+        x.create(mag.size(), mag.type());
+        y.create(mag.size(), mag.type());
+
+        GpuMat mag1cn = mag.reshape(1);
+        GpuMat angle1cn = angle.reshape(1);
+        GpuMat x1cn = x.reshape(1);
+        GpuMat y1cn = y.reshape(1);
+
+        polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
+    }
+}
+
+void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
+{
+    cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
+{
+    cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
+{
+    cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
+{
+    cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
+{
+    polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
+}
+
 #endif
diff --git a/modules/gpuarithm/src/precomp.hpp b/modules/gpuarithm/src/precomp.hpp
index ce497eeba..5dbef9981 100644
--- a/modules/gpuarithm/src/precomp.hpp
+++ b/modules/gpuarithm/src/precomp.hpp
@@ -49,7 +49,6 @@
 
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/core/utility.hpp"
-#include "opencv2/core/core_c.h"
 
 #include "opencv2/core/gpu_private.hpp"
 
diff --git a/modules/gpuarithm/src/matrix_reductions.cpp b/modules/gpuarithm/src/reductions.cpp
similarity index 94%
rename from modules/gpuarithm/src/matrix_reductions.cpp
rename to modules/gpuarithm/src/reductions.cpp
index 6ffde1722..fc397f9ce 100644
--- a/modules/gpuarithm/src/matrix_reductions.cpp
+++ b/modules/gpuarithm/src/reductions.cpp
@@ -47,30 +47,42 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_no_cuda(); }
-void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
 double cv::gpu::norm(const GpuMat&, int) { throw_no_cuda(); return 0.0; }
 double cv::gpu::norm(const GpuMat&, int, GpuMat&) { throw_no_cuda(); return 0.0; }
 double cv::gpu::norm(const GpuMat&, int, const GpuMat&, GpuMat&) { throw_no_cuda(); return 0.0; }
 double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_no_cuda(); return 0.0; }
+
 Scalar cv::gpu::sum(const GpuMat&) { throw_no_cuda(); return Scalar(); }
 Scalar cv::gpu::sum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
 Scalar cv::gpu::sum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
+
 Scalar cv::gpu::absSum(const GpuMat&) { throw_no_cuda(); return Scalar(); }
 Scalar cv::gpu::absSum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
 Scalar cv::gpu::absSum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
+
 Scalar cv::gpu::sqrSum(const GpuMat&) { throw_no_cuda(); return Scalar(); }
 Scalar cv::gpu::sqrSum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
 Scalar cv::gpu::sqrSum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); }
+
 void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&) { throw_no_cuda(); }
 void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&, GpuMat&) { throw_no_cuda(); }
+
 void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const GpuMat&) { throw_no_cuda(); }
 void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+
 int cv::gpu::countNonZero(const GpuMat&) { throw_no_cuda(); return 0; }
 int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_no_cuda(); return 0; }
+
 void cv::gpu::reduce(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_no_cuda(); }
+void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
+
 void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_no_cuda(); }
 
+void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); }
+void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+
 #else
 
 namespace
@@ -109,46 +121,6 @@ namespace
     };
 }
 
-
-////////////////////////////////////////////////////////////////////////
-// meanStdDev
-
-void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
-{
-    GpuMat buf;
-    meanStdDev(src, mean, stddev, buf);
-}
-
-void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat& buf)
-{
-    CV_Assert(src.type() == CV_8UC1);
-
-    if (!deviceSupports(FEATURE_SET_COMPUTE_13))
-        CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
-
-    NppiSize sz;
-    sz.width  = src.cols;
-    sz.height = src.rows;
-
-    DeviceBuffer dbuf(2);
-
-    int bufSize;
-#if (CUDA_VERSION <= 4020)
-    nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
-#else
-    nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
-#endif
-
-    ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
-
-    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
-
-    cudaSafeCall( cudaDeviceSynchronize() );
-
-    double* ptrs[2] = {mean.val, stddev.val};
-    dbuf.download(ptrs);
-}
-
 ////////////////////////////////////////////////////////////////////////
 // norm
 
@@ -697,6 +669,45 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+// meanStdDev
+
+void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
+{
+    GpuMat buf;
+    meanStdDev(src, mean, stddev, buf);
+}
+
+void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat& buf)
+{
+    CV_Assert(src.type() == CV_8UC1);
+
+    if (!deviceSupports(FEATURE_SET_COMPUTE_13))
+        CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
+
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+
+    DeviceBuffer dbuf(2);
+
+    int bufSize;
+#if (CUDA_VERSION <= 4020)
+    nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
+#else
+    nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
+#endif
+
+    ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
+
+    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
+
+    cudaSafeCall( cudaDeviceSynchronize() );
+
+    double* ptrs[2] = {mean.val, stddev.val};
+    dbuf.download(ptrs);
+}
+
 //////////////////////////////////////////////////////////////////////////////
 // rectStdDev
 
@@ -727,4 +738,47 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
         cudaSafeCall( cudaDeviceSynchronize() );
 }
 
+////////////////////////////////////////////////////////////////////////
+// normalize
+
+void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask)
+{
+    GpuMat norm_buf;
+    GpuMat cvt_buf;
+    normalize(src, dst, a, b, norm_type, dtype, mask, norm_buf, cvt_buf);
+}
+
+void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
+{
+    double scale = 1, shift = 0;
+    if (norm_type == NORM_MINMAX)
+    {
+        double smin = 0, smax = 0;
+        double dmin = std::min(a, b), dmax = std::max(a, b);
+        gpu::minMax(src, &smin, &smax, mask, norm_buf);
+        scale = (dmax - dmin) * (smax - smin > std::numeric_limits<double>::epsilon() ? 1.0 / (smax - smin) : 0.0);
+        shift = dmin - smin * scale;
+    }
+    else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF)
+    {
+        scale = gpu::norm(src, norm_type, mask, norm_buf);
+        scale = scale > std::numeric_limits<double>::epsilon() ? a / scale : 0.0;
+        shift = 0;
+    }
+    else
+    {
+        CV_Error(cv::Error::StsBadArg, "Unknown/unsupported norm type");
+    }
+
+    if (mask.empty())
+    {
+        src.convertTo(dst, dtype, scale, shift);
+    }
+    else
+    {
+        src.convertTo(cvt_buf, dtype, scale, shift);
+        cvt_buf.copyTo(dst, mask);
+    }
+}
+
 #endif
diff --git a/modules/gpuarithm/src/split_merge.cpp b/modules/gpuarithm/src/split_merge.cpp
deleted file mode 100644
index c9ab7ed30..000000000
--- a/modules/gpuarithm/src/split_merge.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-using namespace cv;
-using namespace cv::gpu;
-
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
-
-void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
-void cv::gpu::merge(const std::vector<GpuMat>& /*src*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
-void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
-void cv::gpu::split(const GpuMat& /*src*/, std::vector<GpuMat>& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); }
-
-#else /* !defined (HAVE_CUDA) */
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace split_merge
-    {
-        void merge_caller(const PtrStepSzb* src, PtrStepSzb& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
-        void split_caller(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
-    }
-}}}
-
-namespace
-{
-    void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
-    {
-        using namespace ::cv::gpu::cudev::split_merge;
-
-        CV_Assert(src);
-        CV_Assert(n > 0);
-
-        int depth = src[0].depth();
-        Size size = src[0].size();
-
-        if (depth == CV_64F)
-        {
-            if (!deviceSupports(NATIVE_DOUBLE))
-                CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-        }
-
-        bool single_channel_only = true;
-        int total_channels = 0;
-
-        for (size_t i = 0; i < n; ++i)
-        {
-            CV_Assert(src[i].size() == size);
-            CV_Assert(src[i].depth() == depth);
-            single_channel_only = single_channel_only && src[i].channels() == 1;
-            total_channels += src[i].channels();
-        }
-
-        CV_Assert(single_channel_only);
-        CV_Assert(total_channels <= 4);
-
-        if (total_channels == 1)
-            src[0].copyTo(dst);
-        else
-        {
-            dst.create(size, CV_MAKETYPE(depth, total_channels));
-
-            PtrStepSzb src_as_devmem[4];
-            for(size_t i = 0; i < n; ++i)
-                src_as_devmem[i] = src[i];
-
-            PtrStepSzb dst_as_devmem(dst);
-            merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream);
-        }
-    }
-
-    void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream)
-    {
-        using namespace ::cv::gpu::cudev::split_merge;
-
-        CV_Assert(dst);
-
-        int depth = src.depth();
-        int num_channels = src.channels();
-
-        if (depth == CV_64F)
-        {
-            if (!deviceSupports(NATIVE_DOUBLE))
-                CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-        }
-
-        if (num_channels == 1)
-        {
-            src.copyTo(dst[0]);
-            return;
-        }
-
-        for (int i = 0; i < num_channels; ++i)
-            dst[i].create(src.size(), depth);
-
-        CV_Assert(num_channels <= 4);
-
-        PtrStepSzb dst_as_devmem[4];
-        for (int i = 0; i < num_channels; ++i)
-            dst_as_devmem[i] = dst[i];
-
-        PtrStepSzb src_as_devmem(src);
-        split_caller(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), stream);
-    }
-}
-
-void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream)
-{
-    ::merge(src, n, dst, StreamAccessor::getStream(stream));
-}
-
-
-void cv::gpu::merge(const std::vector<GpuMat>& src, GpuMat& dst, Stream& stream)
-{
-    ::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream)
-{
-    ::split(src, dst, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::split(const GpuMat& src, std::vector<GpuMat>& dst, Stream& stream)
-{
-    dst.resize(src.channels());
-    if(src.channels() > 0)
-        ::split(src, &dst[0], StreamAccessor::getStream(stream));
-}
-
-#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuarithm/test/test_arithm.cpp b/modules/gpuarithm/test/test_arithm.cpp
new file mode 100644
index 000000000..93fb0ae84
--- /dev/null
+++ b/modules/gpuarithm/test/test_arithm.cpp
@@ -0,0 +1,439 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+//////////////////////////////////////////////////////////////////////////////
+// GEMM
+
+#ifdef HAVE_CUBLAS
+
+CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+#define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
+
+PARAM_TEST_CASE(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int flags;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        flags = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(GEMM, Accuracy)
+{
+    cv::Mat src1 = randomMat(size, type, -10.0, 10.0);
+    cv::Mat src2 = randomMat(size, type, -10.0, 10.0);
+    cv::Mat src3 = randomMat(size, type, -10.0, 10.0);
+    double alpha = randomDouble(-10.0, 10.0);
+    double beta = randomDouble(-10.0, 10.0);
+
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else if (type == CV_64FC2 && flags != 0)
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+        cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
+
+        cv::Mat dst_gold;
+        cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, GEMM, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_32FC1), MatType(CV_32FC2), MatType(CV_64FC1), MatType(CV_64FC2)),
+    ALL_GEMM_FLAGS,
+    WHOLE_SUBMAT));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// Integral
+
+PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Integral, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
+    cv::gpu::integral(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold;
+    cv::integral(src, dst_gold, CV_32S);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Integral, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int flag;
+
+    cv::Mat a, b;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        flag = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        a = randomMat(size, CV_32FC2);
+        b = randomMat(size, CV_32FC2);
+    }
+};
+
+GPU_TEST_P(MulSpectrums, Simple)
+{
+    cv::gpu::GpuMat c;
+    cv::gpu::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
+
+    cv::Mat c_gold;
+    cv::mulSpectrums(a, b, c_gold, flag, false);
+
+    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
+}
+
+GPU_TEST_P(MulSpectrums, Scaled)
+{
+    float scale = 1.f / size.area();
+
+    cv::gpu::GpuMat c;
+    cv::gpu::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
+
+    cv::Mat c_gold;
+    cv::mulSpectrums(a, b, c_gold, flag, false);
+    c_gold.convertTo(c_gold, c_gold.type(), scale);
+
+    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MulSpectrums, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
+
+////////////////////////////////////////////////////////////////////////////
+// Dft
+
+struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+namespace
+{
+    void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
+    {
+        SCOPED_TRACE(hint);
+
+        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
+
+        cv::Mat b_gold;
+        cv::dft(a, b_gold, flags);
+
+        cv::gpu::GpuMat d_b;
+        cv::gpu::GpuMat d_b_data;
+        if (inplace)
+        {
+            d_b_data.create(1, a.size().area(), CV_32FC2);
+            d_b = cv::gpu::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+        }
+        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
+
+        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
+        ASSERT_EQ(CV_32F, d_b.depth());
+        ASSERT_EQ(2, d_b.channels());
+        EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
+    }
+}
+
+GPU_TEST_P(Dft, C2C)
+{
+    int cols = randomInt(2, 100);
+    int rows = randomInt(2, 100);
+
+    for (int i = 0; i < 2; ++i)
+    {
+        bool inplace = i != 0;
+
+        testC2C("no flags", cols, rows, 0, inplace);
+        testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
+        testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
+        testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
+        testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
+        testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
+        testC2C("single col", 1, rows, 0, inplace);
+        testC2C("single row", cols, 1, 0, inplace);
+        testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
+        testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
+        testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
+        testC2C("size 1 2", 1, 2, 0, inplace);
+        testC2C("size 2 1", 2, 1, 0, inplace);
+    }
+}
+
+namespace
+{
+    void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
+    {
+        SCOPED_TRACE(hint);
+
+        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
+
+        cv::gpu::GpuMat d_b, d_c;
+        cv::gpu::GpuMat d_b_data, d_c_data;
+        if (inplace)
+        {
+            if (a.cols == 1)
+            {
+                d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
+                d_b = cv::gpu::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+            }
+            else
+            {
+                d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
+                d_b = cv::gpu::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
+            }
+            d_c_data.create(1, a.size().area(), CV_32F);
+            d_c = cv::gpu::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
+        }
+
+        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
+        cv::gpu::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
+
+        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
+        EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
+        ASSERT_EQ(CV_32F, d_c.depth());
+        ASSERT_EQ(1, d_c.channels());
+
+        cv::Mat c(d_c);
+        EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
+    }
+}
+
+GPU_TEST_P(Dft, R2CThenC2R)
+{
+    int cols = randomInt(2, 100);
+    int rows = randomInt(2, 100);
+
+    testR2CThenC2R("sanity", cols, rows, false);
+    testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
+    testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
+    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
+    testR2CThenC2R("single col", 1, rows, false);
+    testR2CThenC2R("single col 1", 1, rows + 1, false);
+    testR2CThenC2R("single row", cols, 1, false);
+    testR2CThenC2R("single row 1", cols + 1, 1, false);
+
+    testR2CThenC2R("sanity", cols, rows, true);
+    testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
+    testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
+    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
+    testR2CThenC2R("single row", cols, 1, true);
+    testR2CThenC2R("single row 1", cols + 1, 1, true);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Dft, ALL_DEVICES);
+
+////////////////////////////////////////////////////////
+// Convolve
+
+namespace
+{
+    void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
+    {
+        // reallocate the output array if needed
+        C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
+        cv::Size dftSize;
+
+        // compute the size of DFT transform
+        dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
+        dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
+
+        // allocate temporary buffers and initialize them with 0s
+        cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
+        cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
+
+        // copy A and B to the top-left corners of tempA and tempB, respectively
+        cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
+        A.copyTo(roiA);
+        cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
+        B.copyTo(roiB);
+
+        // now transform the padded A & B in-place;
+        // use "nonzeroRows" hint for faster processing
+        cv::dft(tempA, tempA, 0, A.rows);
+        cv::dft(tempB, tempB, 0, B.rows);
+
+        // multiply the spectrums;
+        // the function handles packed spectrum representations well
+        cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
+
+        // transform the product back from the frequency domain.
+        // Even though all the result rows will be non-zero,
+        // you need only the first C.rows of them, and thus you
+        // pass nonzeroRows == C.rows
+        cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
+
+        // now copy the result back to C.
+        tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
+    }
+
+    IMPLEMENT_PARAM_CLASS(KSize, int)
+    IMPLEMENT_PARAM_CLASS(Ccorr, bool)
+}
+
+PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int ksize;
+    bool ccorr;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        ksize = GET_PARAM(2);
+        ccorr = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Convolve, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
+    cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::convolve(loadMat(src), loadMat(kernel), dst, ccorr);
+
+    cv::Mat dst_gold;
+    convolveDFT(src, kernel, dst_gold, ccorr);
+
+    EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Convolve, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
+    testing::Values(Ccorr(false), Ccorr(true))));
+
+#endif // HAVE_CUBLAS
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp
index aea7086a8..45f796dc5 100644
--- a/modules/gpuarithm/test/test_core.cpp
+++ b/modules/gpuarithm/test/test_core.cpp
@@ -178,2274 +178,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Split, testing::Combine(
     testing::Values(1, 2, 3, 4),
     WHOLE_SUBMAT));
 
-////////////////////////////////////////////////////////////////////////////////
-// Add_Array
-
-PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    int channels;
-    bool useRoi;
-
-    int stype;
-    int dtype;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        stype = CV_MAKE_TYPE(depth.first, channels);
-        dtype = CV_MAKE_TYPE(depth.second, channels);
-    }
-};
-
-GPU_TEST_P(Add_Array, Accuracy)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);
-
-        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
-        cv::add(mat1, mat2, dst_gold, cv::noArray(), depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    ALL_CHANNELS,
-    WHOLE_SUBMAT));
-
-PARAM_TEST_CASE(Add_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    bool useRoi;
-
-    int stype;
-    int dtype;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        stype = CV_MAKE_TYPE(depth.first, 1);
-        dtype = CV_MAKE_TYPE(depth.second, 1);
-    }
-};
-
-GPU_TEST_P(Add_Array_Mask, Accuracy)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
-
-        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
-        cv::add(mat1, mat2, dst_gold, mask, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array_Mask, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Add_Scalar
-
-PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Add_Scalar, WithOutMask)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(0, 255);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
-
-        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
-        cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-GPU_TEST_P(Add_Scalar, WithMask)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(0, 255);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
-
-        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
-        cv::add(mat, val, dst_gold, mask, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Scalar, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Subtract_Array
-
-PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    int channels;
-    bool useRoi;
-
-    int stype;
-    int dtype;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        stype = CV_MAKE_TYPE(depth.first, channels);
-        dtype = CV_MAKE_TYPE(depth.second, channels);
-    }
-};
-
-GPU_TEST_P(Subtract_Array, Accuracy)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);
-
-        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
-        cv::subtract(mat1, mat2, dst_gold, cv::noArray(), depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    ALL_CHANNELS,
-    WHOLE_SUBMAT));
-
-PARAM_TEST_CASE(Subtract_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    bool useRoi;
-
-    int stype;
-    int dtype;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        stype = CV_MAKE_TYPE(depth.first, 1);
-        dtype = CV_MAKE_TYPE(depth.second, 1);
-    }
-};
-
-GPU_TEST_P(Subtract_Array_Mask, Accuracy)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
-
-        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
-        cv::subtract(mat1, mat2, dst_gold, mask, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array_Mask, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Subtract_Scalar
-
-PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Subtract_Scalar, WithOutMask)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(0, 255);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
-
-        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
-        cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-GPU_TEST_P(Subtract_Scalar, WithMask)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(0, 255);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
-
-        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
-        cv::subtract(mat, val, dst_gold, mask, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Scalar, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Multiply_Array
-
-PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    int channels;
-    bool useRoi;
-
-    int stype;
-    int dtype;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        stype = CV_MAKE_TYPE(depth.first, channels);
-        dtype = CV_MAKE_TYPE(depth.second, channels);
-    }
-};
-
-GPU_TEST_P(Multiply_Array, WithOutScale)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
-
-        cv::Mat dst_gold;
-        cv::multiply(mat1, mat2, dst_gold, 1, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0);
-    }
-}
-
-GPU_TEST_P(Multiply_Array, WithScale)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype);
-    double scale = randomDouble(0.0, 255.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
-
-        cv::Mat dst_gold;
-        cv::multiply(mat1, mat2, dst_gold, scale, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 2.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    ALL_CHANNELS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Multiply_Array_Special
-
-PARAM_TEST_CASE(Multiply_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Multiply_Array_Special, Case_8UC4x_32FC1)
-{
-    cv::Mat mat1 = randomMat(size, CV_8UC4);
-    cv::Mat mat2 = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_8UC4, useRoi);
-    cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
-
-    cv::Mat h_dst(dst);
-
-    for (int y = 0; y < h_dst.rows; ++y)
-    {
-        const cv::Vec4b* mat1_row = mat1.ptr<cv::Vec4b>(y);
-        const float* mat2_row = mat2.ptr<float>(y);
-        const cv::Vec4b* dst_row = h_dst.ptr<cv::Vec4b>(y);
-
-        for (int x = 0; x < h_dst.cols; ++x)
-        {
-            cv::Vec4b val1 = mat1_row[x];
-            float val2 = mat2_row[x];
-            cv::Vec4b actual = dst_row[x];
-
-            cv::Vec4b gold;
-
-            gold[0] = cv::saturate_cast<uchar>(val1[0] * val2);
-            gold[1] = cv::saturate_cast<uchar>(val1[1] * val2);
-            gold[2] = cv::saturate_cast<uchar>(val1[2] * val2);
-            gold[3] = cv::saturate_cast<uchar>(val1[3] * val2);
-
-            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-        }
-    }
-}
-
-GPU_TEST_P(Multiply_Array_Special, Case_16SC4x_32FC1)
-{
-    cv::Mat mat1 = randomMat(size, CV_16SC4);
-    cv::Mat mat2 = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_16SC4, useRoi);
-    cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
-
-    cv::Mat h_dst(dst);
-
-    for (int y = 0; y < h_dst.rows; ++y)
-    {
-        const cv::Vec4s* mat1_row = mat1.ptr<cv::Vec4s>(y);
-        const float* mat2_row = mat2.ptr<float>(y);
-        const cv::Vec4s* dst_row = h_dst.ptr<cv::Vec4s>(y);
-
-        for (int x = 0; x < h_dst.cols; ++x)
-        {
-            cv::Vec4s val1 = mat1_row[x];
-            float val2 = mat2_row[x];
-            cv::Vec4s actual = dst_row[x];
-
-            cv::Vec4s gold;
-
-            gold[0] = cv::saturate_cast<short>(val1[0] * val2);
-            gold[1] = cv::saturate_cast<short>(val1[1] * val2);
-            gold[2] = cv::saturate_cast<short>(val1[2] * val2);
-            gold[3] = cv::saturate_cast<short>(val1[3] * val2);
-
-            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-        }
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array_Special, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Multiply_Scalar
-
-PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Multiply_Scalar, WithOutScale)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(0, 255);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat), val, dst, 1, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::multiply(loadMat(mat, useRoi), val, dst, 1, depth.second);
-
-        cv::Mat dst_gold;
-        cv::multiply(mat, val, dst_gold, 1, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
-    }
-}
-
-
-GPU_TEST_P(Multiply_Scalar, WithScale)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(0, 255);
-    double scale = randomDouble(0.0, 255.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat), val, dst, scale, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second);
-
-        cv::Mat dst_gold;
-        cv::multiply(mat, val, dst_gold, scale, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Divide_Array
-
-PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    int channels;
-    bool useRoi;
-
-    int stype;
-    int dtype;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        stype = CV_MAKE_TYPE(depth.first, channels);
-        dtype = CV_MAKE_TYPE(depth.second, channels);
-    }
-};
-
-GPU_TEST_P(Divide_Array, WithOutScale)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
-
-        cv::Mat dst_gold;
-        cv::divide(mat1, mat2, dst_gold, 1, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
-    }
-}
-
-GPU_TEST_P(Divide_Array, WithScale)
-{
-    cv::Mat mat1 = randomMat(size, stype);
-    cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
-    double scale = randomDouble(0.0, 255.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
-
-        cv::Mat dst_gold;
-        cv::divide(mat1, mat2, dst_gold, scale, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 1.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    ALL_CHANNELS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Divide_Array_Special
-
-PARAM_TEST_CASE(Divide_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Divide_Array_Special, Case_8UC4x_32FC1)
-{
-    cv::Mat mat1 = randomMat(size, CV_8UC4);
-    cv::Mat mat2 = randomMat(size, CV_32FC1, 1.0, 255.0);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_8UC4, useRoi);
-    cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
-
-    cv::Mat h_dst(dst);
-
-    for (int y = 0; y < h_dst.rows; ++y)
-    {
-        const cv::Vec4b* mat1_row = mat1.ptr<cv::Vec4b>(y);
-        const float* mat2_row = mat2.ptr<float>(y);
-        const cv::Vec4b* dst_row = h_dst.ptr<cv::Vec4b>(y);
-
-        for (int x = 0; x < h_dst.cols; ++x)
-        {
-            cv::Vec4b val1 = mat1_row[x];
-            float val2 = mat2_row[x];
-            cv::Vec4b actual = dst_row[x];
-
-            cv::Vec4b gold;
-
-            gold[0] = cv::saturate_cast<uchar>(val1[0] / val2);
-            gold[1] = cv::saturate_cast<uchar>(val1[1] / val2);
-            gold[2] = cv::saturate_cast<uchar>(val1[2] / val2);
-            gold[3] = cv::saturate_cast<uchar>(val1[3] / val2);
-
-            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-        }
-    }
-}
-
-GPU_TEST_P(Divide_Array_Special, Case_16SC4x_32FC1)
-{
-    cv::Mat mat1 = randomMat(size, CV_16SC4);
-    cv::Mat mat2 = randomMat(size, CV_32FC1, 1.0, 255.0);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_16SC4, useRoi);
-    cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
-
-    cv::Mat h_dst(dst);
-
-    for (int y = 0; y < h_dst.rows; ++y)
-    {
-        const cv::Vec4s* mat1_row = mat1.ptr<cv::Vec4s>(y);
-        const float* mat2_row = mat2.ptr<float>(y);
-        const cv::Vec4s* dst_row = h_dst.ptr<cv::Vec4s>(y);
-
-        for (int x = 0; x < h_dst.cols; ++x)
-        {
-            cv::Vec4s val1 = mat1_row[x];
-            float val2 = mat2_row[x];
-            cv::Vec4s actual = dst_row[x];
-
-            cv::Vec4s gold;
-
-            gold[0] = cv::saturate_cast<short>(val1[0] / val2);
-            gold[1] = cv::saturate_cast<short>(val1[1] / val2);
-            gold[2] = cv::saturate_cast<short>(val1[2] / val2);
-            gold[3] = cv::saturate_cast<short>(val1[3] / val2);
-
-            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
-        }
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array_Special, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Divide_Scalar
-
-PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Divide_Scalar, WithOutScale)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(1.0, 255.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat), val, dst, 1, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::divide(loadMat(mat, useRoi), val, dst, 1, depth.second);
-
-        cv::Mat dst_gold;
-        cv::divide(mat, val, dst_gold, 1, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
-    }
-}
-
-GPU_TEST_P(Divide_Scalar, WithScale)
-{
-    cv::Mat mat = randomMat(size, depth.first);
-    cv::Scalar val = randomScalar(1.0, 255.0);
-    double scale = randomDouble(0.0, 255.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat), val, dst, scale, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second);
-
-        cv::Mat dst_gold;
-        cv::divide(mat, val, dst_gold, scale, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 1.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Divide_Scalar_Inv
-
-PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    std::pair<MatDepth, MatDepth> depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Divide_Scalar_Inv, Accuracy)
-{
-    double scale = randomDouble(0.0, 255.0);
-    cv::Mat mat = randomMat(size, depth.first, 1.0, 255.0);
-
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(scale, loadMat(mat), dst, depth.second);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second);
-
-        cv::Mat dst_gold;
-        cv::divide(scale, mat, dst_gold, depth.second);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar_Inv, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    DEPTH_PAIRS,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// AbsDiff
-
-PARAM_TEST_CASE(AbsDiff, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(AbsDiff, Array)
-{
-    cv::Mat src1 = randomMat(size, depth);
-    cv::Mat src2 = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::absdiff(loadMat(src1), loadMat(src2), dst);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
-
-        cv::Mat dst_gold;
-        cv::absdiff(src1, src2, dst_gold);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-    }
-}
-
-GPU_TEST_P(AbsDiff, Scalar)
-{
-    cv::Mat src = randomMat(size, depth);
-    cv::Scalar val = randomScalar(0.0, 255.0);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::absdiff(loadMat(src), val, dst);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::absdiff(loadMat(src, useRoi), val, dst);
-
-        cv::Mat dst_gold;
-        cv::absdiff(src, val, dst_gold);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth <= CV_32F ? 1.0 : 1e-5);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, AbsDiff, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Abs
-
-PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Abs, Accuracy)
-{
-    cv::Mat src = randomMat(size, depth);
-
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::abs(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold = cv::abs(src);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Abs, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Sqr
-
-PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Sqr, Accuracy)
-{
-    cv::Mat src = randomMat(size, depth, 0, depth == CV_8U ? 16 : 255);
-
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::sqr(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold;
-    cv::multiply(src, src, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sqr, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U),
-                    MatDepth(CV_16U),
-                    MatDepth(CV_16S),
-                    MatDepth(CV_32F)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Sqrt
-
-namespace
-{
-    template <typename T> void sqrtImpl(const cv::Mat& src, cv::Mat& dst)
-    {
-        dst.create(src.size(), src.type());
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-                dst.at<T>(y, x) = static_cast<T>(std::sqrt(static_cast<float>(src.at<T>(y, x))));
-        }
-    }
-
-    void sqrtGold(const cv::Mat& src, cv::Mat& dst)
-    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
-
-        const func_t funcs[] =
-        {
-            sqrtImpl<uchar>, sqrtImpl<schar>, sqrtImpl<ushort>, sqrtImpl<short>,
-            sqrtImpl<int>, sqrtImpl<float>
-        };
-
-        funcs[src.depth()](src, dst);
-    }
-}
-
-PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Sqrt, Accuracy)
-{
-    cv::Mat src = randomMat(size, depth);
-
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::sqrt(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold;
-    sqrtGold(src, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sqrt, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U),
-                    MatDepth(CV_16U),
-                    MatDepth(CV_16S),
-                    MatDepth(CV_32F)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Log
-
-namespace
-{
-    template <typename T> void logImpl(const cv::Mat& src, cv::Mat& dst)
-    {
-        dst.create(src.size(), src.type());
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-                dst.at<T>(y, x) = static_cast<T>(std::log(static_cast<float>(src.at<T>(y, x))));
-        }
-    }
-
-    void logGold(const cv::Mat& src, cv::Mat& dst)
-    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
-
-        const func_t funcs[] =
-        {
-            logImpl<uchar>, logImpl<schar>, logImpl<ushort>, logImpl<short>,
-            logImpl<int>, logImpl<float>
-        };
-
-        funcs[src.depth()](src, dst);
-    }
-}
-
-PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Log, Accuracy)
-{
-    cv::Mat src = randomMat(size, depth, 1.0, 255.0);
-
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::log(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold;
-    logGold(src, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Log, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U),
-                    MatDepth(CV_16U),
-                    MatDepth(CV_16S),
-                    MatDepth(CV_32F)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Exp
-
-namespace
-{
-    template <typename T> void expImpl(const cv::Mat& src, cv::Mat& dst)
-    {
-        dst.create(src.size(), src.type());
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-                dst.at<T>(y, x) = cv::saturate_cast<T>(static_cast<int>(std::exp(static_cast<float>(src.at<T>(y, x)))));
-        }
-    }
-    void expImpl_float(const cv::Mat& src, cv::Mat& dst)
-    {
-        dst.create(src.size(), src.type());
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-                dst.at<float>(y, x) = std::exp(static_cast<float>(src.at<float>(y, x)));
-        }
-    }
-
-    void expGold(const cv::Mat& src, cv::Mat& dst)
-    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
-
-        const func_t funcs[] =
-        {
-            expImpl<uchar>, expImpl<schar>, expImpl<ushort>, expImpl<short>,
-            expImpl<int>, expImpl_float
-        };
-
-        funcs[src.depth()](src, dst);
-    }
-}
-
-PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Exp, Accuracy)
-{
-    cv::Mat src = randomMat(size, depth, 0.0, 10.0);
-
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::exp(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold;
-    expGold(src, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Exp, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U),
-                    MatDepth(CV_16U),
-                    MatDepth(CV_16S),
-                    MatDepth(CV_32F)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Compare_Array
-
-CV_ENUM(CmpCode, CMP_EQ, CMP_NE, CMP_GT, CMP_GE, CMP_LT, CMP_LE)
-
-PARAM_TEST_CASE(Compare_Array, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    int cmp_code;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        cmp_code = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Compare_Array, Accuracy)
-{
-    cv::Mat src1 = randomMat(size, depth);
-    cv::Mat src2 = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::compare(loadMat(src1), loadMat(src2), dst, cmp_code);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, CV_8UC1, useRoi);
-        cv::gpu::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code);
-
-        cv::Mat dst_gold;
-        cv::compare(src1, src2, dst_gold, cmp_code);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Compare_Array, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    CmpCode::all(),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Compare_Scalar
-
-namespace
-{
-    template <template <typename> class Op, typename T>
-    void compareScalarImpl(const cv::Mat& src, cv::Scalar sc, cv::Mat& dst)
-    {
-        Op<T> op;
-
-        const int cn = src.channels();
-
-        dst.create(src.size(), CV_MAKE_TYPE(CV_8U, cn));
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-            {
-                for (int c = 0; c < cn; ++c)
-                {
-                    T src_val = src.at<T>(y, x * cn + c);
-                    T sc_val = cv::saturate_cast<T>(sc.val[c]);
-                    dst.at<uchar>(y, x * cn + c) = static_cast<uchar>(static_cast<int>(op(src_val, sc_val)) * 255);
-                }
-            }
-        }
-    }
-
-    void compareScalarGold(const cv::Mat& src, cv::Scalar sc, cv::Mat& dst, int cmpop)
-    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Scalar sc, cv::Mat& dst);
-        static const func_t funcs[7][6] =
-        {
-            {compareScalarImpl<std::equal_to, unsigned char> , compareScalarImpl<std::greater, unsigned char> , compareScalarImpl<std::greater_equal, unsigned char> , compareScalarImpl<std::less, unsigned char> , compareScalarImpl<std::less_equal, unsigned char> , compareScalarImpl<std::not_equal_to, unsigned char> },
-            {compareScalarImpl<std::equal_to, signed char>   , compareScalarImpl<std::greater, signed char>   , compareScalarImpl<std::greater_equal, signed char>   , compareScalarImpl<std::less, signed char>   , compareScalarImpl<std::less_equal, signed char>   , compareScalarImpl<std::not_equal_to, signed char>   },
-            {compareScalarImpl<std::equal_to, unsigned short>, compareScalarImpl<std::greater, unsigned short>, compareScalarImpl<std::greater_equal, unsigned short>, compareScalarImpl<std::less, unsigned short>, compareScalarImpl<std::less_equal, unsigned short>, compareScalarImpl<std::not_equal_to, unsigned short>},
-            {compareScalarImpl<std::equal_to, short>         , compareScalarImpl<std::greater, short>         , compareScalarImpl<std::greater_equal, short>         , compareScalarImpl<std::less, short>         , compareScalarImpl<std::less_equal, short>         , compareScalarImpl<std::not_equal_to, short>         },
-            {compareScalarImpl<std::equal_to, int>           , compareScalarImpl<std::greater, int>           , compareScalarImpl<std::greater_equal, int>           , compareScalarImpl<std::less, int>           , compareScalarImpl<std::less_equal, int>           , compareScalarImpl<std::not_equal_to, int>           },
-            {compareScalarImpl<std::equal_to, float>         , compareScalarImpl<std::greater, float>         , compareScalarImpl<std::greater_equal, float>         , compareScalarImpl<std::less, float>         , compareScalarImpl<std::less_equal, float>         , compareScalarImpl<std::not_equal_to, float>         },
-            {compareScalarImpl<std::equal_to, double>        , compareScalarImpl<std::greater, double>        , compareScalarImpl<std::greater_equal, double>        , compareScalarImpl<std::less, double>        , compareScalarImpl<std::less_equal, double>        , compareScalarImpl<std::not_equal_to, double>        }
-        };
-
-        funcs[src.depth()][cmpop](src, sc, dst);
-    }
-}
-
-PARAM_TEST_CASE(Compare_Scalar, cv::gpu::DeviceInfo, cv::Size, MatType, CmpCode, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int cmp_code;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        cmp_code = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Compare_Scalar, Accuracy)
-{
-    cv::Mat src = randomMat(size, type);
-    cv::Scalar sc = randomScalar(0.0, 255.0);
-
-    if (src.depth() < CV_32F)
-    {
-        sc.val[0] = cvRound(sc.val[0]);
-        sc.val[1] = cvRound(sc.val[1]);
-        sc.val[2] = cvRound(sc.val[2]);
-        sc.val[3] = cvRound(sc.val[3]);
-    }
-
-    if (src.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::compare(loadMat(src), sc, dst, cmp_code);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, CV_MAKE_TYPE(CV_8U, src.channels()), useRoi);
-
-        cv::gpu::compare(loadMat(src, useRoi), sc, dst, cmp_code);
-
-        cv::Mat dst_gold;
-        compareScalarGold(src, sc, dst_gold, cmp_code);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Compare_Scalar, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    TYPES(CV_8U, CV_64F, 1, 4),
-    CmpCode::all(),
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// Bitwise_Array
-
-PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-
-    cv::Mat src1;
-    cv::Mat src2;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        src1 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
-        src2 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
-    }
-};
-
-GPU_TEST_P(Bitwise_Array, Not)
-{
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_not(loadMat(src1), dst);
-
-    cv::Mat dst_gold = ~src1;
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-GPU_TEST_P(Bitwise_Array, Or)
-{
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_or(loadMat(src1), loadMat(src2), dst);
-
-    cv::Mat dst_gold = src1 | src2;
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-GPU_TEST_P(Bitwise_Array, And)
-{
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_and(loadMat(src1), loadMat(src2), dst);
-
-    cv::Mat dst_gold = src1 & src2;
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-GPU_TEST_P(Bitwise_Array, Xor)
-{
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_xor(loadMat(src1), loadMat(src2), dst);
-
-    cv::Mat dst_gold = src1 ^ src2;
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Bitwise_Array, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    TYPES(CV_8U, CV_32S, 1, 4)));
-
-//////////////////////////////////////////////////////////////////////////////
-// Bitwise_Scalar
-
-PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    int channels;
-
-    cv::Mat src;
-    cv::Scalar val;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        src = randomMat(size, CV_MAKE_TYPE(depth, channels));
-        cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
-        val = ival;
-    }
-};
-
-GPU_TEST_P(Bitwise_Scalar, Or)
-{
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_or(loadMat(src), val, dst);
-
-    cv::Mat dst_gold;
-    cv::bitwise_or(src, val, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-GPU_TEST_P(Bitwise_Scalar, And)
-{
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_and(loadMat(src), val, dst);
-
-    cv::Mat dst_gold;
-    cv::bitwise_and(src, val, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-GPU_TEST_P(Bitwise_Scalar, Xor)
-{
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_xor(loadMat(src), val, dst);
-
-    cv::Mat dst_gold;
-    cv::bitwise_xor(src, val, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Bitwise_Scalar, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
-    IMAGE_CHANNELS));
-
-//////////////////////////////////////////////////////////////////////////////
-// RShift
-
-namespace
-{
-    template <typename T> void rhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
-    {
-        const int cn = src.channels();
-
-        dst.create(src.size(), src.type());
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-            {
-                for (int c = 0; c < cn; ++c)
-                    dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) >> val.val[c];
-            }
-        }
-    }
-
-    void rhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
-    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
-
-        const func_t funcs[] =
-        {
-            rhiftImpl<uchar>, rhiftImpl<schar>, rhiftImpl<ushort>, rhiftImpl<short>, rhiftImpl<int>
-        };
-
-        funcs[src.depth()](src, val, dst);
-    }
-}
-
-PARAM_TEST_CASE(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    int channels;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(RShift, Accuracy)
-{
-    int type = CV_MAKE_TYPE(depth, channels);
-    cv::Mat src = randomMat(size, type);
-    cv::Scalar_<int> val = randomScalar(0.0, 8.0);
-
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::rshift(loadMat(src, useRoi), val, dst);
-
-    cv::Mat dst_gold;
-    rhiftGold(src, val, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, RShift, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U),
-                    MatDepth(CV_8S),
-                    MatDepth(CV_16U),
-                    MatDepth(CV_16S),
-                    MatDepth(CV_32S)),
-    IMAGE_CHANNELS,
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// LShift
-
-namespace
-{
-    template <typename T> void lhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
-    {
-        const int cn = src.channels();
-
-        dst.create(src.size(), src.type());
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-            {
-                for (int c = 0; c < cn; ++c)
-                    dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) << val.val[c];
-            }
-        }
-    }
-
-    void lhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
-    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
-
-        const func_t funcs[] =
-        {
-            lhiftImpl<uchar>, lhiftImpl<schar>, lhiftImpl<ushort>, lhiftImpl<short>, lhiftImpl<int>
-        };
-
-        funcs[src.depth()](src, val, dst);
-    }
-}
-
-PARAM_TEST_CASE(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    int channels;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(LShift, Accuracy)
-{
-    int type = CV_MAKE_TYPE(depth, channels);
-    cv::Mat src = randomMat(size, type);
-    cv::Scalar_<int> val = randomScalar(0.0, 8.0);
-
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::lshift(loadMat(src, useRoi), val, dst);
-
-    cv::Mat dst_gold;
-    lhiftGold(src, val, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, LShift, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
-    IMAGE_CHANNELS,
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// Min
-
-PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Min, Array)
-{
-    cv::Mat src1 = randomMat(size, depth);
-    cv::Mat src2 = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::min(loadMat(src1), loadMat(src2), dst);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
-
-        cv::Mat dst_gold = cv::min(src1, src2);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-    }
-}
-
-GPU_TEST_P(Min, Scalar)
-{
-    cv::Mat src = randomMat(size, depth);
-    double val = randomDouble(0.0, 255.0);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::min(loadMat(src), val, dst);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::min(loadMat(src, useRoi), val, dst);
-
-        cv::Mat dst_gold = cv::min(src, val);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Min, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// Max
-
-PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Max, Array)
-{
-    cv::Mat src1 = randomMat(size, depth);
-    cv::Mat src2 = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::max(loadMat(src1), loadMat(src2), dst);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
-
-        cv::Mat dst_gold = cv::max(src1, src2);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-    }
-}
-
-GPU_TEST_P(Max, Scalar)
-{
-    cv::Mat src = randomMat(size, depth);
-    double val = randomDouble(0.0, 255.0);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::max(loadMat(src), val, dst);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::max(loadMat(src, useRoi), val, dst);
-
-        cv::Mat dst_gold = cv::max(src, val);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Max, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Pow
-
-PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Pow, Accuracy)
-{
-    cv::Mat src = randomMat(size, depth, 0.0, 10.0);
-    double power = randomDouble(2.0, 4.0);
-
-    if (src.depth() < CV_32F)
-        power = static_cast<int>(power);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::pow(loadMat(src), power, dst);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::pow(loadMat(src, useRoi), power, dst);
-
-        cv::Mat dst_gold;
-        cv::pow(src, power, dst_gold);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Pow, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// AddWeighted
-
-PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth1;
-    int depth2;
-    int dst_depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth1 = GET_PARAM(2);
-        depth2 = GET_PARAM(3);
-        dst_depth = GET_PARAM(4);
-        useRoi = GET_PARAM(5);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(AddWeighted, Accuracy)
-{
-    cv::Mat src1 = randomMat(size, depth1);
-    cv::Mat src2 = randomMat(size, depth2);
-    double alpha = randomDouble(-10.0, 10.0);
-    double beta = randomDouble(-10.0, 10.0);
-    double gamma = randomDouble(-10.0, 10.0);
-
-    if ((depth1 == CV_64F || depth2 == CV_64F || dst_depth == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::addWeighted(loadMat(src1), alpha, loadMat(src2), beta, gamma, dst, dst_depth);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, dst_depth, useRoi);
-        cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth);
-
-        cv::Mat dst_gold;
-        cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-3);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, AddWeighted, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    ALL_DEPTH,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// GEMM
-
-#ifdef HAVE_CUBLAS
-
-CV_FLAGS(GemmFlags, 0, GEMM_1_T, GEMM_2_T, GEMM_3_T);
-#define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
-
-PARAM_TEST_CASE(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int flags;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        flags = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(GEMM, Accuracy)
-{
-    cv::Mat src1 = randomMat(size, type, -10.0, 10.0);
-    cv::Mat src2 = randomMat(size, type, -10.0, 10.0);
-    cv::Mat src3 = randomMat(size, type, -10.0, 10.0);
-    double alpha = randomDouble(-10.0, 10.0);
-    double beta = randomDouble(-10.0, 10.0);
-
-    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else if (type == CV_64FC2 && flags != 0)
-    {
-        try
-        {
-            cv::gpu::GpuMat dst;
-            cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-        cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
-
-        cv::Mat dst_gold;
-        cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
-
-        EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, GEMM, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_32FC1), MatType(CV_32FC2), MatType(CV_64FC1), MatType(CV_64FC2)),
-    ALL_GEMM_FLAGS,
-    WHOLE_SUBMAT));
-
-#endif // HAVE_CUBLAS
-
 ////////////////////////////////////////////////////////////////////////////////
 // Transpose
 
@@ -2620,1265 +352,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, LUT, testing::Combine(
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3)),
     WHOLE_SUBMAT));
 
-////////////////////////////////////////////////////////////////////////////////
-// Magnitude
-
-PARAM_TEST_CASE(Magnitude, cv::gpu::DeviceInfo, cv::Size, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Magnitude, NPP)
-{
-    cv::Mat src = randomMat(size, CV_32FC2);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitude(loadMat(src, useRoi), dst);
-
-    cv::Mat arr[2];
-    cv::split(src, arr);
-    cv::Mat dst_gold;
-    cv::magnitude(arr[0], arr[1], dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
-}
-
-GPU_TEST_P(Magnitude, Sqr_NPP)
-{
-    cv::Mat src = randomMat(size, CV_32FC2);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitudeSqr(loadMat(src, useRoi), dst);
-
-    cv::Mat arr[2];
-    cv::split(src, arr);
-    cv::Mat dst_gold;
-    cv::magnitude(arr[0], arr[1], dst_gold);
-    cv::multiply(dst_gold, dst_gold, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-1);
-}
-
-GPU_TEST_P(Magnitude, Accuracy)
-{
-    cv::Mat x = randomMat(size, CV_32FC1);
-    cv::Mat y = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitude(loadMat(x, useRoi), loadMat(y, useRoi), dst);
-
-    cv::Mat dst_gold;
-    cv::magnitude(x, y, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
-}
-
-GPU_TEST_P(Magnitude, Sqr_Accuracy)
-{
-    cv::Mat x = randomMat(size, CV_32FC1);
-    cv::Mat y = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitudeSqr(loadMat(x, useRoi), loadMat(y, useRoi), dst);
-
-    cv::Mat dst_gold;
-    cv::magnitude(x, y, dst_gold);
-    cv::multiply(dst_gold, dst_gold, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-1);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Magnitude, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Phase
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(AngleInDegrees, bool)
-}
-
-PARAM_TEST_CASE(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool angleInDegrees;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        angleInDegrees = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Phase, Accuracy)
-{
-    cv::Mat x = randomMat(size, CV_32FC1);
-    cv::Mat y = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::phase(loadMat(x, useRoi), loadMat(y, useRoi), dst, angleInDegrees);
-
-    cv::Mat dst_gold;
-    cv::phase(x, y, dst_gold, angleInDegrees);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, angleInDegrees ? 1e-2 : 1e-3);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Phase, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// CartToPolar
-
-PARAM_TEST_CASE(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool angleInDegrees;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        angleInDegrees = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(CartToPolar, Accuracy)
-{
-    cv::Mat x = randomMat(size, CV_32FC1);
-    cv::Mat y = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat mag = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::GpuMat angle = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::cartToPolar(loadMat(x, useRoi), loadMat(y, useRoi), mag, angle, angleInDegrees);
-
-    cv::Mat mag_gold;
-    cv::Mat angle_gold;
-    cv::cartToPolar(x, y, mag_gold, angle_gold, angleInDegrees);
-
-    EXPECT_MAT_NEAR(mag_gold, mag, 1e-4);
-    EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, CartToPolar, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// polarToCart
-
-PARAM_TEST_CASE(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool angleInDegrees;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        angleInDegrees = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(PolarToCart, Accuracy)
-{
-    cv::Mat magnitude = randomMat(size, CV_32FC1);
-    cv::Mat angle = randomMat(size, CV_32FC1);
-
-    cv::gpu::GpuMat x = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::GpuMat y = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), x, y, angleInDegrees);
-
-    cv::Mat x_gold;
-    cv::Mat y_gold;
-    cv::polarToCart(magnitude, angle, x_gold, y_gold, angleInDegrees);
-
-    EXPECT_MAT_NEAR(x_gold, x, 1e-4);
-    EXPECT_MAT_NEAR(y_gold, y, 1e-4);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, PolarToCart, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// MeanStdDev
-
-PARAM_TEST_CASE(MeanStdDev, cv::gpu::DeviceInfo, cv::Size, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MeanStdDev, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_8UC1);
-
-    if (!supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_13))
-    {
-        try
-        {
-            cv::Scalar mean;
-            cv::Scalar stddev;
-            cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
-        }
-    }
-    else
-    {
-        cv::Scalar mean;
-        cv::Scalar stddev;
-        cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
-
-        cv::Scalar mean_gold;
-        cv::Scalar stddev_gold;
-        cv::meanStdDev(src, mean_gold, stddev_gold);
-
-        EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5);
-        EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, MeanStdDev, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// Norm
-
-PARAM_TEST_CASE(Norm, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    int normCode;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        normCode = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Norm, Accuracy)
-{
-    cv::Mat src = randomMat(size, depth);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
-
-    cv::gpu::GpuMat d_buf;
-    double val = cv::gpu::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi), d_buf);
-
-    double val_gold = cv::norm(src, normCode, mask);
-
-    EXPECT_NEAR(val_gold, val, depth < CV_32F ? 0.0 : 1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Norm, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U),
-                    MatDepth(CV_8S),
-                    MatDepth(CV_16U),
-                    MatDepth(CV_16S),
-                    MatDepth(CV_32S),
-                    MatDepth(CV_32F)),
-    testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// normDiff
-
-PARAM_TEST_CASE(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormCode, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int normCode;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        normCode = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(NormDiff, Accuracy)
-{
-    cv::Mat src1 = randomMat(size, CV_8UC1);
-    cv::Mat src2 = randomMat(size, CV_8UC1);
-
-    double val = cv::gpu::norm(loadMat(src1, useRoi), loadMat(src2, useRoi), normCode);
-
-    double val_gold = cv::norm(src1, src2, normCode);
-
-    EXPECT_NEAR(val_gold, val, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, NormDiff, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)),
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// Sum
-
-namespace
-{
-    template <typename T>
-    cv::Scalar absSumImpl(const cv::Mat& src)
-    {
-        const int cn = src.channels();
-
-        cv::Scalar sum = cv::Scalar::all(0);
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-            {
-                for (int c = 0; c < cn; ++c)
-                    sum[c] += std::abs(src.at<T>(y, x * cn + c));
-            }
-        }
-
-        return sum;
-    }
-
-    cv::Scalar absSumGold(const cv::Mat& src)
-    {
-        typedef cv::Scalar (*func_t)(const cv::Mat& src);
-
-        static const func_t funcs[] =
-        {
-            absSumImpl<uchar>,
-            absSumImpl<schar>,
-            absSumImpl<ushort>,
-            absSumImpl<short>,
-            absSumImpl<int>,
-            absSumImpl<float>,
-            absSumImpl<double>
-        };
-
-        return funcs[src.depth()](src);
-    }
-
-    template <typename T>
-    cv::Scalar sqrSumImpl(const cv::Mat& src)
-    {
-        const int cn = src.channels();
-
-        cv::Scalar sum = cv::Scalar::all(0);
-
-        for (int y = 0; y < src.rows; ++y)
-        {
-            for (int x = 0; x < src.cols; ++x)
-            {
-                for (int c = 0; c < cn; ++c)
-                {
-                    const T val = src.at<T>(y, x * cn + c);
-                    sum[c] += val * val;
-                }
-            }
-        }
-
-        return sum;
-    }
-
-    cv::Scalar sqrSumGold(const cv::Mat& src)
-    {
-        typedef cv::Scalar (*func_t)(const cv::Mat& src);
-
-        static const func_t funcs[] =
-        {
-            sqrSumImpl<uchar>,
-            sqrSumImpl<schar>,
-            sqrSumImpl<ushort>,
-            sqrSumImpl<short>,
-            sqrSumImpl<int>,
-            sqrSumImpl<float>,
-            sqrSumImpl<double>
-        };
-
-        return funcs[src.depth()](src);
-    }
-}
-
-PARAM_TEST_CASE(Sum, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    bool useRoi;
-
-    cv::Mat src;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        src = randomMat(size, type, -128.0, 128.0);
-    }
-};
-
-GPU_TEST_P(Sum, Simple)
-{
-    cv::Scalar val = cv::gpu::sum(loadMat(src, useRoi));
-
-    cv::Scalar val_gold = cv::sum(src);
-
-    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
-}
-
-GPU_TEST_P(Sum, Abs)
-{
-    cv::Scalar val = cv::gpu::absSum(loadMat(src, useRoi));
-
-    cv::Scalar val_gold = absSumGold(src);
-
-    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
-}
-
-GPU_TEST_P(Sum, Sqr)
-{
-    cv::Scalar val = cv::gpu::sqrSum(loadMat(src, useRoi));
-
-    cv::Scalar val_gold = sqrSumGold(src);
-
-    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sum, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    TYPES(CV_8U, CV_64F, 1, 4),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// MinMax
-
-PARAM_TEST_CASE(MinMax, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MinMax, WithoutMask)
-{
-    cv::Mat src = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            double minVal, maxVal;
-            cv::gpu::minMax(loadMat(src), &minVal, &maxVal);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        double minVal, maxVal;
-        cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal);
-
-        double minVal_gold, maxVal_gold;
-        minMaxLocGold(src, &minVal_gold, &maxVal_gold);
-
-        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
-    }
-}
-
-GPU_TEST_P(MinMax, WithMask)
-{
-    cv::Mat src = randomMat(size, depth);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            double minVal, maxVal;
-            cv::gpu::minMax(loadMat(src), &minVal, &maxVal, loadMat(mask));
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        double minVal, maxVal;
-        cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi));
-
-        double minVal_gold, maxVal_gold;
-        minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0, mask);
-
-        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
-    }
-}
-
-GPU_TEST_P(MinMax, NullPtr)
-{
-    cv::Mat src = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            double minVal, maxVal;
-            cv::gpu::minMax(loadMat(src), &minVal, 0);
-            cv::gpu::minMax(loadMat(src), 0, &maxVal);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        double minVal, maxVal;
-        cv::gpu::minMax(loadMat(src, useRoi), &minVal, 0);
-        cv::gpu::minMax(loadMat(src, useRoi), 0, &maxVal);
-
-        double minVal_gold, maxVal_gold;
-        minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0);
-
-        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, MinMax, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// MinMaxLoc
-
-namespace
-{
-    template <typename T>
-    void expectEqualImpl(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
-    {
-        EXPECT_EQ(src.at<T>(loc_gold.y, loc_gold.x), src.at<T>(loc.y, loc.x));
-    }
-
-    void expectEqual(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
-    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Point loc_gold, cv::Point loc);
-
-        static const func_t funcs[] =
-        {
-            expectEqualImpl<uchar>,
-            expectEqualImpl<schar>,
-            expectEqualImpl<ushort>,
-            expectEqualImpl<short>,
-            expectEqualImpl<int>,
-            expectEqualImpl<float>,
-            expectEqualImpl<double>
-        };
-
-        funcs[src.depth()](src, loc_gold, loc);
-    }
-}
-
-PARAM_TEST_CASE(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MinMaxLoc, WithoutMask)
-{
-    cv::Mat src = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            double minVal, maxVal;
-            cv::Point minLoc, maxLoc;
-            cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc);
-
-        double minVal_gold, maxVal_gold;
-        cv::Point minLoc_gold, maxLoc_gold;
-        minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
-
-        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
-
-        expectEqual(src, minLoc_gold, minLoc);
-        expectEqual(src, maxLoc_gold, maxLoc);
-    }
-}
-
-GPU_TEST_P(MinMaxLoc, WithMask)
-{
-    cv::Mat src = randomMat(size, depth);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            double minVal, maxVal;
-            cv::Point minLoc, maxLoc;
-            cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask));
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi));
-
-        double minVal_gold, maxVal_gold;
-        cv::Point minLoc_gold, maxLoc_gold;
-        minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold, mask);
-
-        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
-
-        expectEqual(src, minLoc_gold, minLoc);
-        expectEqual(src, maxLoc_gold, maxLoc);
-    }
-}
-
-GPU_TEST_P(MinMaxLoc, NullPtr)
-{
-    cv::Mat src = randomMat(size, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            double minVal, maxVal;
-            cv::Point minLoc, maxLoc;
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
-
-        double minVal_gold, maxVal_gold;
-        cv::Point minLoc_gold, maxLoc_gold;
-        minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
-
-        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
-        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
-
-        expectEqual(src, minLoc_gold, minLoc);
-        expectEqual(src, maxLoc_gold, maxLoc);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, MinMaxLoc, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////
-// CountNonZero
-
-PARAM_TEST_CASE(CountNonZero, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    bool useRoi;
-
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(CountNonZero, Accuracy)
-{
-    cv::Mat srcBase = randomMat(size, CV_8U, 0.0, 1.5);
-    cv::Mat src;
-    srcBase.convertTo(src, depth);
-
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
-    {
-        try
-        {
-            cv::gpu::countNonZero(loadMat(src));
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
-        }
-    }
-    else
-    {
-        int val = cv::gpu::countNonZero(loadMat(src, useRoi));
-
-        int val_gold = cv::countNonZero(src);
-
-        ASSERT_EQ(val_gold, val);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, CountNonZero, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// Reduce
-
-CV_ENUM(ReduceCode, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN)
-
-PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int depth;
-    int channels;
-    int reduceOp;
-    bool useRoi;
-
-    int type;
-    int dst_depth;
-    int dst_type;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        depth = GET_PARAM(2);
-        channels = GET_PARAM(3);
-        reduceOp = GET_PARAM(4);
-        useRoi = GET_PARAM(5);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        type = CV_MAKE_TYPE(depth, channels);
-
-        if (reduceOp == cv::REDUCE_MAX || reduceOp == cv::REDUCE_MIN)
-            dst_depth = depth;
-        else if (reduceOp == cv::REDUCE_SUM)
-            dst_depth = depth == CV_8U ? CV_32S : depth < CV_64F ? CV_32F : depth;
-        else
-            dst_depth = depth < CV_32F ? CV_32F : depth;
-
-        dst_type = CV_MAKE_TYPE(dst_depth, channels);
-    }
-
-};
-
-GPU_TEST_P(Reduce, Rows)
-{
-    cv::Mat src = randomMat(size, type);
-
-    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols, 1), dst_type, useRoi);
-    cv::gpu::reduce(loadMat(src, useRoi), dst, 0, reduceOp, dst_depth);
-
-    cv::Mat dst_gold;
-    cv::reduce(src, dst_gold, 0, reduceOp, dst_depth);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 0.0 : 0.02);
-}
-
-GPU_TEST_P(Reduce, Cols)
-{
-    cv::Mat src = randomMat(size, type);
-
-    cv::gpu::GpuMat dst = createMat(cv::Size(src.rows, 1), dst_type, useRoi);
-    cv::gpu::reduce(loadMat(src, useRoi), dst, 1, reduceOp, dst_depth);
-
-    cv::Mat dst_gold;
-    cv::reduce(src, dst_gold, 1, reduceOp, dst_depth);
-    dst_gold.cols = dst_gold.rows;
-    dst_gold.rows = 1;
-    dst_gold.step = dst_gold.cols * dst_gold.elemSize();
-
-    EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 0.0 : 0.02);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Reduce, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatDepth(CV_8U),
-                    MatDepth(CV_16U),
-                    MatDepth(CV_16S),
-                    MatDepth(CV_32F),
-                    MatDepth(CV_64F)),
-    ALL_CHANNELS,
-    ReduceCode::all(),
-    WHOLE_SUBMAT));
-
-//////////////////////////////////////////////////////////////////////////////
-// Normalize
-
-PARAM_TEST_CASE(Normalize, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int norm_type;
-    bool useRoi;
-
-    double alpha;
-    double beta;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        norm_type = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        alpha = 1;
-        beta = 0;
-    }
-
-};
-
-GPU_TEST_P(Normalize, WithOutMask)
-{
-    cv::Mat src = randomMat(size, type);
-
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type);
-
-    cv::Mat dst_gold;
-    cv::normalize(src, dst_gold, alpha, beta, norm_type, type);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
-}
-
-GPU_TEST_P(Normalize, WithMask)
-{
-    cv::Mat src = randomMat(size, type);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
-
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    dst.setTo(cv::Scalar::all(0));
-    cv::gpu::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type, loadMat(mask, useRoi));
-
-    cv::Mat dst_gold(size, type);
-    dst_gold.setTo(cv::Scalar::all(0));
-    cv::normalize(src, dst_gold, alpha, beta, norm_type, type, mask);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Normalize, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    ALL_DEPTH,
-    testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF), NormCode(cv::NORM_MINMAX)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////
-// MulSpectrums
-
-CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
-
-PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int flag;
-
-    cv::Mat a, b;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        flag = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        a = randomMat(size, CV_32FC2);
-        b = randomMat(size, CV_32FC2);
-    }
-};
-
-GPU_TEST_P(MulSpectrums, Simple)
-{
-    cv::gpu::GpuMat c;
-    cv::gpu::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
-
-    cv::Mat c_gold;
-    cv::mulSpectrums(a, b, c_gold, flag, false);
-
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
-}
-
-GPU_TEST_P(MulSpectrums, Scaled)
-{
-    float scale = 1.f / size.area();
-
-    cv::gpu::GpuMat c;
-    cv::gpu::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
-
-    cv::Mat c_gold;
-    cv::mulSpectrums(a, b, c_gold, flag, false);
-    c_gold.convertTo(c_gold, c_gold.type(), scale);
-
-    EXPECT_MAT_NEAR(c_gold, c, 1e-2);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, MulSpectrums, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
-
-////////////////////////////////////////////////////////////////////////////
-// Dft
-
-struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-namespace
-{
-    void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
-    {
-        SCOPED_TRACE(hint);
-
-        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
-
-        cv::Mat b_gold;
-        cv::dft(a, b_gold, flags);
-
-        cv::gpu::GpuMat d_b;
-        cv::gpu::GpuMat d_b_data;
-        if (inplace)
-        {
-            d_b_data.create(1, a.size().area(), CV_32FC2);
-            d_b = cv::gpu::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
-        }
-        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
-
-        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
-        ASSERT_EQ(CV_32F, d_b.depth());
-        ASSERT_EQ(2, d_b.channels());
-        EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
-    }
-}
-
-GPU_TEST_P(Dft, C2C)
-{
-    int cols = randomInt(2, 100);
-    int rows = randomInt(2, 100);
-
-    for (int i = 0; i < 2; ++i)
-    {
-        bool inplace = i != 0;
-
-        testC2C("no flags", cols, rows, 0, inplace);
-        testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
-        testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
-        testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
-        testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
-        testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
-        testC2C("single col", 1, rows, 0, inplace);
-        testC2C("single row", cols, 1, 0, inplace);
-        testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
-        testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
-        testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
-        testC2C("size 1 2", 1, 2, 0, inplace);
-        testC2C("size 2 1", 2, 1, 0, inplace);
-    }
-}
-
-namespace
-{
-    void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
-    {
-        SCOPED_TRACE(hint);
-
-        cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
-
-        cv::gpu::GpuMat d_b, d_c;
-        cv::gpu::GpuMat d_b_data, d_c_data;
-        if (inplace)
-        {
-            if (a.cols == 1)
-            {
-                d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
-                d_b = cv::gpu::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
-            }
-            else
-            {
-                d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
-                d_b = cv::gpu::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
-            }
-            d_c_data.create(1, a.size().area(), CV_32F);
-            d_c = cv::gpu::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
-        }
-
-        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
-        cv::gpu::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
-
-        EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
-        EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
-        ASSERT_EQ(CV_32F, d_c.depth());
-        ASSERT_EQ(1, d_c.channels());
-
-        cv::Mat c(d_c);
-        EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
-    }
-}
-
-GPU_TEST_P(Dft, R2CThenC2R)
-{
-    int cols = randomInt(2, 100);
-    int rows = randomInt(2, 100);
-
-    testR2CThenC2R("sanity", cols, rows, false);
-    testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
-    testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
-    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
-    testR2CThenC2R("single col", 1, rows, false);
-    testR2CThenC2R("single col 1", 1, rows + 1, false);
-    testR2CThenC2R("single row", cols, 1, false);
-    testR2CThenC2R("single row 1", cols + 1, 1, false);
-
-    testR2CThenC2R("sanity", cols, rows, true);
-    testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
-    testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
-    testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
-    testR2CThenC2R("single row", cols, 1, true);
-    testR2CThenC2R("single row 1", cols + 1, 1, true);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Dft, ALL_DEVICES);
-
-////////////////////////////////////////////////////////
-// Convolve
-
-namespace
-{
-    void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
-    {
-        // reallocate the output array if needed
-        C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
-        cv::Size dftSize;
-
-        // compute the size of DFT transform
-        dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
-        dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
-
-        // allocate temporary buffers and initialize them with 0s
-        cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
-        cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
-
-        // copy A and B to the top-left corners of tempA and tempB, respectively
-        cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
-        A.copyTo(roiA);
-        cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
-        B.copyTo(roiB);
-
-        // now transform the padded A & B in-place;
-        // use "nonzeroRows" hint for faster processing
-        cv::dft(tempA, tempA, 0, A.rows);
-        cv::dft(tempB, tempB, 0, B.rows);
-
-        // multiply the spectrums;
-        // the function handles packed spectrum representations well
-        cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
-
-        // transform the product back from the frequency domain.
-        // Even though all the result rows will be non-zero,
-        // you need only the first C.rows of them, and thus you
-        // pass nonzeroRows == C.rows
-        cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
-
-        // now copy the result back to C.
-        tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
-    }
-
-    IMPLEMENT_PARAM_CLASS(KSize, int)
-    IMPLEMENT_PARAM_CLASS(Ccorr, bool)
-}
-
-PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int ksize;
-    bool ccorr;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        ksize = GET_PARAM(2);
-        ccorr = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Convolve, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
-    cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::convolve(loadMat(src), loadMat(kernel), dst, ccorr);
-
-    cv::Mat dst_gold;
-    convolveDFT(src, kernel, dst_gold, ccorr);
-
-    EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Convolve, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
-    testing::Values(Ccorr(false), Ccorr(true))));
-
 //////////////////////////////////////////////////////////////////////////////
 // CopyMakeBorder
 
@@ -3939,89 +412,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, CopyMakeBorder, testing::Combine(
     ALL_BORDER_TYPES,
     WHOLE_SUBMAT));
 
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// Integral
-
-PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Integral, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_8UC1);
-
-    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
-    cv::gpu::integral(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold;
-    cv::integral(src, dst_gold, CV_32S);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Integral, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    WHOLE_SUBMAT));
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// Threshold
-
-CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
-#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
-
-PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int threshOp;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        threshOp = GET_PARAM(3);
-        useRoi = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Threshold, Accuracy)
-{
-    cv::Mat src = randomMat(size, type);
-    double maxVal = randomDouble(20.0, 127.0);
-    double thresh = randomDouble(0.0, maxVal);
-
-    cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi);
-    cv::gpu::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp);
-
-    cv::Mat dst_gold;
-    cv::threshold(src, dst_gold, thresh, maxVal, threshOp);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Arithm, Threshold, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)),
-    ALL_THRESH_OPS,
-    WHOLE_SUBMAT));
-
 #endif // HAVE_CUDA
diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp
new file mode 100644
index 000000000..3d06b8f9d
--- /dev/null
+++ b/modules/gpuarithm/test/test_element_operations.cpp
@@ -0,0 +1,2503 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+////////////////////////////////////////////////////////////////////////////////
+// Add_Array
+
+PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    int channels;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, channels);
+        dtype = CV_MAKE_TYPE(depth.second, channels);
+    }
+};
+
+GPU_TEST_P(Add_Array, Accuracy)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);
+
+        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
+        cv::add(mat1, mat2, dst_gold, cv::noArray(), depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    ALL_CHANNELS,
+    WHOLE_SUBMAT));
+
+PARAM_TEST_CASE(Add_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, 1);
+        dtype = CV_MAKE_TYPE(depth.second, 1);
+    }
+};
+
+GPU_TEST_P(Add_Array_Mask, Accuracy)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
+
+        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
+        cv::add(mat1, mat2, dst_gold, mask, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array_Mask, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Add_Scalar
+
+PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Add_Scalar, WithOutMask)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(0, 255);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::add(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
+
+        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
+        cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+GPU_TEST_P(Add_Scalar, WithMask)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(0, 255);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
+
+        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
+        cv::add(mat, val, dst_gold, mask, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Scalar, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Subtract_Array
+
+PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    int channels;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, channels);
+        dtype = CV_MAKE_TYPE(depth.second, channels);
+    }
+};
+
+GPU_TEST_P(Subtract_Array, Accuracy)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);
+
+        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
+        cv::subtract(mat1, mat2, dst_gold, cv::noArray(), depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    ALL_CHANNELS,
+    WHOLE_SUBMAT));
+
+PARAM_TEST_CASE(Subtract_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, 1);
+        dtype = CV_MAKE_TYPE(depth.second, 1);
+    }
+};
+
+GPU_TEST_P(Subtract_Array_Mask, Accuracy)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
+
+        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
+        cv::subtract(mat1, mat2, dst_gold, mask, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array_Mask, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Subtract_Scalar
+
+PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Subtract_Scalar, WithOutMask)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(0, 255);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::subtract(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
+
+        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
+        cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+GPU_TEST_P(Subtract_Scalar, WithMask)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(0, 255);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
+
+        cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
+        cv::subtract(mat, val, dst_gold, mask, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Scalar, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Multiply_Array
+
+PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    int channels;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, channels);
+        dtype = CV_MAKE_TYPE(depth.second, channels);
+    }
+};
+
+GPU_TEST_P(Multiply_Array, WithOutScale)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
+
+        cv::Mat dst_gold;
+        cv::multiply(mat1, mat2, dst_gold, 1, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0);
+    }
+}
+
+GPU_TEST_P(Multiply_Array, WithScale)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+    double scale = randomDouble(0.0, 255.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
+
+        cv::Mat dst_gold;
+        cv::multiply(mat1, mat2, dst_gold, scale, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 2.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    ALL_CHANNELS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Multiply_Array_Special
+
+PARAM_TEST_CASE(Multiply_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Multiply_Array_Special, Case_8UC4x_32FC1)
+{
+    cv::Mat mat1 = randomMat(size, CV_8UC4);
+    cv::Mat mat2 = randomMat(size, CV_32FC1);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_8UC4, useRoi);
+    cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+
+    cv::Mat h_dst(dst);
+
+    for (int y = 0; y < h_dst.rows; ++y)
+    {
+        const cv::Vec4b* mat1_row = mat1.ptr<cv::Vec4b>(y);
+        const float* mat2_row = mat2.ptr<float>(y);
+        const cv::Vec4b* dst_row = h_dst.ptr<cv::Vec4b>(y);
+
+        for (int x = 0; x < h_dst.cols; ++x)
+        {
+            cv::Vec4b val1 = mat1_row[x];
+            float val2 = mat2_row[x];
+            cv::Vec4b actual = dst_row[x];
+
+            cv::Vec4b gold;
+
+            gold[0] = cv::saturate_cast<uchar>(val1[0] * val2);
+            gold[1] = cv::saturate_cast<uchar>(val1[1] * val2);
+            gold[2] = cv::saturate_cast<uchar>(val1[2] * val2);
+            gold[3] = cv::saturate_cast<uchar>(val1[3] * val2);
+
+            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+        }
+    }
+}
+
+GPU_TEST_P(Multiply_Array_Special, Case_16SC4x_32FC1)
+{
+    cv::Mat mat1 = randomMat(size, CV_16SC4);
+    cv::Mat mat2 = randomMat(size, CV_32FC1);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_16SC4, useRoi);
+    cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+
+    cv::Mat h_dst(dst);
+
+    for (int y = 0; y < h_dst.rows; ++y)
+    {
+        const cv::Vec4s* mat1_row = mat1.ptr<cv::Vec4s>(y);
+        const float* mat2_row = mat2.ptr<float>(y);
+        const cv::Vec4s* dst_row = h_dst.ptr<cv::Vec4s>(y);
+
+        for (int x = 0; x < h_dst.cols; ++x)
+        {
+            cv::Vec4s val1 = mat1_row[x];
+            float val2 = mat2_row[x];
+            cv::Vec4s actual = dst_row[x];
+
+            cv::Vec4s gold;
+
+            gold[0] = cv::saturate_cast<short>(val1[0] * val2);
+            gold[1] = cv::saturate_cast<short>(val1[1] * val2);
+            gold[2] = cv::saturate_cast<short>(val1[2] * val2);
+            gold[3] = cv::saturate_cast<short>(val1[3] * val2);
+
+            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array_Special, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Multiply_Scalar
+
+PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Multiply_Scalar, WithOutScale)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(0, 255);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::multiply(loadMat(mat), val, dst, 1, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::gpu::multiply(loadMat(mat, useRoi), val, dst, 1, depth.second);
+
+        cv::Mat dst_gold;
+        cv::multiply(mat, val, dst_gold, 1, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
+    }
+}
+
+
+GPU_TEST_P(Multiply_Scalar, WithScale)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(0, 255);
+    double scale = randomDouble(0.0, 255.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::multiply(loadMat(mat), val, dst, scale, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second);
+
+        cv::Mat dst_gold;
+        cv::multiply(mat, val, dst_gold, scale, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Divide_Array
+
+PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    int channels;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, channels);
+        dtype = CV_MAKE_TYPE(depth.second, channels);
+    }
+};
+
+GPU_TEST_P(Divide_Array, WithOutScale)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
+
+        cv::Mat dst_gold;
+        cv::divide(mat1, mat2, dst_gold, 1, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
+    }
+}
+
+GPU_TEST_P(Divide_Array, WithScale)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
+    double scale = randomDouble(0.0, 255.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
+
+        cv::Mat dst_gold;
+        cv::divide(mat1, mat2, dst_gold, scale, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 1.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    ALL_CHANNELS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Divide_Array_Special
+
+PARAM_TEST_CASE(Divide_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Divide_Array_Special, Case_8UC4x_32FC1)
+{
+    cv::Mat mat1 = randomMat(size, CV_8UC4);
+    cv::Mat mat2 = randomMat(size, CV_32FC1, 1.0, 255.0);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_8UC4, useRoi);
+    cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+
+    cv::Mat h_dst(dst);
+
+    for (int y = 0; y < h_dst.rows; ++y)
+    {
+        const cv::Vec4b* mat1_row = mat1.ptr<cv::Vec4b>(y);
+        const float* mat2_row = mat2.ptr<float>(y);
+        const cv::Vec4b* dst_row = h_dst.ptr<cv::Vec4b>(y);
+
+        for (int x = 0; x < h_dst.cols; ++x)
+        {
+            cv::Vec4b val1 = mat1_row[x];
+            float val2 = mat2_row[x];
+            cv::Vec4b actual = dst_row[x];
+
+            cv::Vec4b gold;
+
+            gold[0] = cv::saturate_cast<uchar>(val1[0] / val2);
+            gold[1] = cv::saturate_cast<uchar>(val1[1] / val2);
+            gold[2] = cv::saturate_cast<uchar>(val1[2] / val2);
+            gold[3] = cv::saturate_cast<uchar>(val1[3] / val2);
+
+            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+        }
+    }
+}
+
+GPU_TEST_P(Divide_Array_Special, Case_16SC4x_32FC1)
+{
+    cv::Mat mat1 = randomMat(size, CV_16SC4);
+    cv::Mat mat2 = randomMat(size, CV_32FC1, 1.0, 255.0);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_16SC4, useRoi);
+    cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+
+    cv::Mat h_dst(dst);
+
+    for (int y = 0; y < h_dst.rows; ++y)
+    {
+        const cv::Vec4s* mat1_row = mat1.ptr<cv::Vec4s>(y);
+        const float* mat2_row = mat2.ptr<float>(y);
+        const cv::Vec4s* dst_row = h_dst.ptr<cv::Vec4s>(y);
+
+        for (int x = 0; x < h_dst.cols; ++x)
+        {
+            cv::Vec4s val1 = mat1_row[x];
+            float val2 = mat2_row[x];
+            cv::Vec4s actual = dst_row[x];
+
+            cv::Vec4s gold;
+
+            gold[0] = cv::saturate_cast<short>(val1[0] / val2);
+            gold[1] = cv::saturate_cast<short>(val1[1] / val2);
+            gold[2] = cv::saturate_cast<short>(val1[2] / val2);
+            gold[3] = cv::saturate_cast<short>(val1[3] / val2);
+
+            ASSERT_LE(std::abs(gold[0] - actual[0]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+            ASSERT_LE(std::abs(gold[1] - actual[1]), 1.0);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array_Special, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Divide_Scalar
+
+PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Divide_Scalar, WithOutScale)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(1.0, 255.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::divide(loadMat(mat), val, dst, 1, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::gpu::divide(loadMat(mat, useRoi), val, dst, 1, depth.second);
+
+        cv::Mat dst_gold;
+        cv::divide(mat, val, dst_gold, 1, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
+    }
+}
+
+GPU_TEST_P(Divide_Scalar, WithScale)
+{
+    cv::Mat mat = randomMat(size, depth.first);
+    cv::Scalar val = randomScalar(1.0, 255.0);
+    double scale = randomDouble(0.0, 255.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::divide(loadMat(mat), val, dst, scale, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second);
+
+        cv::Mat dst_gold;
+        cv::divide(mat, val, dst_gold, scale, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 1.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Divide_Scalar_Inv
+
+PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Divide_Scalar_Inv, Accuracy)
+{
+    double scale = randomDouble(0.0, 255.0);
+    cv::Mat mat = randomMat(size, depth.first, 1.0, 255.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::divide(scale, loadMat(mat), dst, depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second);
+
+        cv::Mat dst_gold;
+        cv::divide(scale, mat, dst_gold, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar_Inv, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// AbsDiff
+
+PARAM_TEST_CASE(AbsDiff, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(AbsDiff, Array)
+{
+    cv::Mat src1 = randomMat(size, depth);
+    cv::Mat src2 = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::absdiff(loadMat(src1), loadMat(src2), dst);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+        cv::gpu::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
+
+        cv::Mat dst_gold;
+        cv::absdiff(src1, src2, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+}
+
+GPU_TEST_P(AbsDiff, Scalar)
+{
+    cv::Mat src = randomMat(size, depth);
+    cv::Scalar val = randomScalar(0.0, 255.0);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::absdiff(loadMat(src), val, dst);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+        cv::gpu::absdiff(loadMat(src, useRoi), val, dst);
+
+        cv::Mat dst_gold;
+        cv::absdiff(src, val, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth <= CV_32F ? 1.0 : 1e-5);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, AbsDiff, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Abs
+
+PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Abs, Accuracy)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+    cv::gpu::abs(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold = cv::abs(src);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Abs, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Sqr
+
+PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Sqr, Accuracy)
+{
+    cv::Mat src = randomMat(size, depth, 0, depth == CV_8U ? 16 : 255);
+
+    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+    cv::gpu::sqr(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold;
+    cv::multiply(src, src, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sqr, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U),
+                    MatDepth(CV_16U),
+                    MatDepth(CV_16S),
+                    MatDepth(CV_32F)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Sqrt
+
+namespace
+{
+    template <typename T> void sqrtImpl(const cv::Mat& src, cv::Mat& dst)
+    {
+        dst.create(src.size(), src.type());
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+                dst.at<T>(y, x) = static_cast<T>(std::sqrt(static_cast<float>(src.at<T>(y, x))));
+        }
+    }
+
+    void sqrtGold(const cv::Mat& src, cv::Mat& dst)
+    {
+        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
+
+        const func_t funcs[] =
+        {
+            sqrtImpl<uchar>, sqrtImpl<schar>, sqrtImpl<ushort>, sqrtImpl<short>,
+            sqrtImpl<int>, sqrtImpl<float>
+        };
+
+        funcs[src.depth()](src, dst);
+    }
+}
+
+PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Sqrt, Accuracy)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+    cv::gpu::sqrt(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold;
+    sqrtGold(src, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sqrt, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U),
+                    MatDepth(CV_16U),
+                    MatDepth(CV_16S),
+                    MatDepth(CV_32F)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Log
+
+namespace
+{
+    template <typename T> void logImpl(const cv::Mat& src, cv::Mat& dst)
+    {
+        dst.create(src.size(), src.type());
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+                dst.at<T>(y, x) = static_cast<T>(std::log(static_cast<float>(src.at<T>(y, x))));
+        }
+    }
+
+    void logGold(const cv::Mat& src, cv::Mat& dst)
+    {
+        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
+
+        const func_t funcs[] =
+        {
+            logImpl<uchar>, logImpl<schar>, logImpl<ushort>, logImpl<short>,
+            logImpl<int>, logImpl<float>
+        };
+
+        funcs[src.depth()](src, dst);
+    }
+}
+
+PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Log, Accuracy)
+{
+    cv::Mat src = randomMat(size, depth, 1.0, 255.0);
+
+    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+    cv::gpu::log(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold;
+    logGold(src, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Log, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U),
+                    MatDepth(CV_16U),
+                    MatDepth(CV_16S),
+                    MatDepth(CV_32F)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Exp
+
+namespace
+{
+    template <typename T> void expImpl(const cv::Mat& src, cv::Mat& dst)
+    {
+        dst.create(src.size(), src.type());
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+                dst.at<T>(y, x) = cv::saturate_cast<T>(static_cast<int>(std::exp(static_cast<float>(src.at<T>(y, x)))));
+        }
+    }
+    void expImpl_float(const cv::Mat& src, cv::Mat& dst)
+    {
+        dst.create(src.size(), src.type());
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+                dst.at<float>(y, x) = std::exp(static_cast<float>(src.at<float>(y, x)));
+        }
+    }
+
+    void expGold(const cv::Mat& src, cv::Mat& dst)
+    {
+        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
+
+        const func_t funcs[] =
+        {
+            expImpl<uchar>, expImpl<schar>, expImpl<ushort>, expImpl<short>,
+            expImpl<int>, expImpl_float
+        };
+
+        funcs[src.depth()](src, dst);
+    }
+}
+
+PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Exp, Accuracy)
+{
+    cv::Mat src = randomMat(size, depth, 0.0, 10.0);
+
+    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+    cv::gpu::exp(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold;
+    expGold(src, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Exp, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U),
+                    MatDepth(CV_16U),
+                    MatDepth(CV_16S),
+                    MatDepth(CV_32F)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Compare_Array
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+#define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
+
+PARAM_TEST_CASE(Compare_Array, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    int cmp_code;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        cmp_code = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Compare_Array, Accuracy)
+{
+    cv::Mat src1 = randomMat(size, depth);
+    cv::Mat src2 = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::compare(loadMat(src1), loadMat(src2), dst, cmp_code);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, CV_8UC1, useRoi);
+        cv::gpu::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code);
+
+        cv::Mat dst_gold;
+        cv::compare(src1, src2, dst_gold, cmp_code);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Compare_Array, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    ALL_CMP_CODES,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Compare_Scalar
+
+namespace
+{
+    template <template <typename> class Op, typename T>
+    void compareScalarImpl(const cv::Mat& src, cv::Scalar sc, cv::Mat& dst)
+    {
+        Op<T> op;
+
+        const int cn = src.channels();
+
+        dst.create(src.size(), CV_MAKE_TYPE(CV_8U, cn));
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+            {
+                for (int c = 0; c < cn; ++c)
+                {
+                    T src_val = src.at<T>(y, x * cn + c);
+                    T sc_val = cv::saturate_cast<T>(sc.val[c]);
+                    dst.at<uchar>(y, x * cn + c) = static_cast<uchar>(static_cast<int>(op(src_val, sc_val)) * 255);
+                }
+            }
+        }
+    }
+
+    void compareScalarGold(const cv::Mat& src, cv::Scalar sc, cv::Mat& dst, int cmpop)
+    {
+        typedef void (*func_t)(const cv::Mat& src, cv::Scalar sc, cv::Mat& dst);
+        static const func_t funcs[7][6] =
+        {
+            {compareScalarImpl<std::equal_to, unsigned char> , compareScalarImpl<std::greater, unsigned char> , compareScalarImpl<std::greater_equal, unsigned char> , compareScalarImpl<std::less, unsigned char> , compareScalarImpl<std::less_equal, unsigned char> , compareScalarImpl<std::not_equal_to, unsigned char> },
+            {compareScalarImpl<std::equal_to, signed char>   , compareScalarImpl<std::greater, signed char>   , compareScalarImpl<std::greater_equal, signed char>   , compareScalarImpl<std::less, signed char>   , compareScalarImpl<std::less_equal, signed char>   , compareScalarImpl<std::not_equal_to, signed char>   },
+            {compareScalarImpl<std::equal_to, unsigned short>, compareScalarImpl<std::greater, unsigned short>, compareScalarImpl<std::greater_equal, unsigned short>, compareScalarImpl<std::less, unsigned short>, compareScalarImpl<std::less_equal, unsigned short>, compareScalarImpl<std::not_equal_to, unsigned short>},
+            {compareScalarImpl<std::equal_to, short>         , compareScalarImpl<std::greater, short>         , compareScalarImpl<std::greater_equal, short>         , compareScalarImpl<std::less, short>         , compareScalarImpl<std::less_equal, short>         , compareScalarImpl<std::not_equal_to, short>         },
+            {compareScalarImpl<std::equal_to, int>           , compareScalarImpl<std::greater, int>           , compareScalarImpl<std::greater_equal, int>           , compareScalarImpl<std::less, int>           , compareScalarImpl<std::less_equal, int>           , compareScalarImpl<std::not_equal_to, int>           },
+            {compareScalarImpl<std::equal_to, float>         , compareScalarImpl<std::greater, float>         , compareScalarImpl<std::greater_equal, float>         , compareScalarImpl<std::less, float>         , compareScalarImpl<std::less_equal, float>         , compareScalarImpl<std::not_equal_to, float>         },
+            {compareScalarImpl<std::equal_to, double>        , compareScalarImpl<std::greater, double>        , compareScalarImpl<std::greater_equal, double>        , compareScalarImpl<std::less, double>        , compareScalarImpl<std::less_equal, double>        , compareScalarImpl<std::not_equal_to, double>        }
+        };
+
+        funcs[src.depth()][cmpop](src, sc, dst);
+    }
+}
+
+PARAM_TEST_CASE(Compare_Scalar, cv::gpu::DeviceInfo, cv::Size, MatType, CmpCode, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int cmp_code;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        cmp_code = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Compare_Scalar, Accuracy)
+{
+    cv::Mat src = randomMat(size, type);
+    cv::Scalar sc = randomScalar(0.0, 255.0);
+
+    if (src.depth() < CV_32F)
+    {
+        sc.val[0] = cvRound(sc.val[0]);
+        sc.val[1] = cvRound(sc.val[1]);
+        sc.val[2] = cvRound(sc.val[2]);
+        sc.val[3] = cvRound(sc.val[3]);
+    }
+
+    if (src.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::compare(loadMat(src), sc, dst, cmp_code);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, CV_MAKE_TYPE(CV_8U, src.channels()), useRoi);
+
+        cv::gpu::compare(loadMat(src, useRoi), sc, dst, cmp_code);
+
+        cv::Mat dst_gold;
+        compareScalarGold(src, sc, dst_gold, cmp_code);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Compare_Scalar, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    TYPES(CV_8U, CV_64F, 1, 4),
+    ALL_CMP_CODES,
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// Bitwise_Array
+
+PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+
+    cv::Mat src1;
+    cv::Mat src2;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        src1 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
+        src2 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
+    }
+};
+
+GPU_TEST_P(Bitwise_Array, Not)
+{
+    cv::gpu::GpuMat dst;
+    cv::gpu::bitwise_not(loadMat(src1), dst);
+
+    cv::Mat dst_gold = ~src1;
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+GPU_TEST_P(Bitwise_Array, Or)
+{
+    cv::gpu::GpuMat dst;
+    cv::gpu::bitwise_or(loadMat(src1), loadMat(src2), dst);
+
+    cv::Mat dst_gold = src1 | src2;
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+GPU_TEST_P(Bitwise_Array, And)
+{
+    cv::gpu::GpuMat dst;
+    cv::gpu::bitwise_and(loadMat(src1), loadMat(src2), dst);
+
+    cv::Mat dst_gold = src1 & src2;
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+GPU_TEST_P(Bitwise_Array, Xor)
+{
+    cv::gpu::GpuMat dst;
+    cv::gpu::bitwise_xor(loadMat(src1), loadMat(src2), dst);
+
+    cv::Mat dst_gold = src1 ^ src2;
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Bitwise_Array, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    TYPES(CV_8U, CV_32S, 1, 4)));
+
+//////////////////////////////////////////////////////////////////////////////
+// Bitwise_Scalar
+
+PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    int channels;
+
+    cv::Mat src;
+    cv::Scalar val;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        src = randomMat(size, CV_MAKE_TYPE(depth, channels));
+        cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
+        val = ival;
+    }
+};
+
+GPU_TEST_P(Bitwise_Scalar, Or)
+{
+    cv::gpu::GpuMat dst;
+    cv::gpu::bitwise_or(loadMat(src), val, dst);
+
+    cv::Mat dst_gold;
+    cv::bitwise_or(src, val, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+GPU_TEST_P(Bitwise_Scalar, And)
+{
+    cv::gpu::GpuMat dst;
+    cv::gpu::bitwise_and(loadMat(src), val, dst);
+
+    cv::Mat dst_gold;
+    cv::bitwise_and(src, val, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+GPU_TEST_P(Bitwise_Scalar, Xor)
+{
+    cv::gpu::GpuMat dst;
+    cv::gpu::bitwise_xor(loadMat(src), val, dst);
+
+    cv::Mat dst_gold;
+    cv::bitwise_xor(src, val, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Bitwise_Scalar, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
+    IMAGE_CHANNELS));
+
+//////////////////////////////////////////////////////////////////////////////
+// RShift
+
+namespace
+{
+    template <typename T> void rhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+    {
+        const int cn = src.channels();
+
+        dst.create(src.size(), src.type());
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+            {
+                for (int c = 0; c < cn; ++c)
+                    dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) >> val.val[c];
+            }
+        }
+    }
+
+    void rhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+    {
+        typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
+
+        const func_t funcs[] =
+        {
+            rhiftImpl<uchar>, rhiftImpl<schar>, rhiftImpl<ushort>, rhiftImpl<short>, rhiftImpl<int>
+        };
+
+        funcs[src.depth()](src, val, dst);
+    }
+}
+
+PARAM_TEST_CASE(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    int channels;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(RShift, Accuracy)
+{
+    int type = CV_MAKE_TYPE(depth, channels);
+    cv::Mat src = randomMat(size, type);
+    cv::Scalar_<int> val = randomScalar(0.0, 8.0);
+
+    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::gpu::rshift(loadMat(src, useRoi), val, dst);
+
+    cv::Mat dst_gold;
+    rhiftGold(src, val, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, RShift, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U),
+                    MatDepth(CV_8S),
+                    MatDepth(CV_16U),
+                    MatDepth(CV_16S),
+                    MatDepth(CV_32S)),
+    IMAGE_CHANNELS,
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// LShift
+
+namespace
+{
+    template <typename T> void lhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+    {
+        const int cn = src.channels();
+
+        dst.create(src.size(), src.type());
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+            {
+                for (int c = 0; c < cn; ++c)
+                    dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) << val.val[c];
+            }
+        }
+    }
+
+    void lhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+    {
+        typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
+
+        const func_t funcs[] =
+        {
+            lhiftImpl<uchar>, lhiftImpl<schar>, lhiftImpl<ushort>, lhiftImpl<short>, lhiftImpl<int>
+        };
+
+        funcs[src.depth()](src, val, dst);
+    }
+}
+
+PARAM_TEST_CASE(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    int channels;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(LShift, Accuracy)
+{
+    int type = CV_MAKE_TYPE(depth, channels);
+    cv::Mat src = randomMat(size, type);
+    cv::Scalar_<int> val = randomScalar(0.0, 8.0);
+
+    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::gpu::lshift(loadMat(src, useRoi), val, dst);
+
+    cv::Mat dst_gold;
+    lhiftGold(src, val, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, LShift, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
+    IMAGE_CHANNELS,
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// Min
+
+PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Min, Array)
+{
+    cv::Mat src1 = randomMat(size, depth);
+    cv::Mat src2 = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::min(loadMat(src1), loadMat(src2), dst);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+        cv::gpu::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
+
+        cv::Mat dst_gold = cv::min(src1, src2);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+}
+
+GPU_TEST_P(Min, Scalar)
+{
+    cv::Mat src = randomMat(size, depth);
+    double val = randomDouble(0.0, 255.0);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::min(loadMat(src), val, dst);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+        cv::gpu::min(loadMat(src, useRoi), val, dst);
+
+        cv::Mat dst_gold = cv::min(src, val);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Min, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// Max
+
+PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Max, Array)
+{
+    cv::Mat src1 = randomMat(size, depth);
+    cv::Mat src2 = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::max(loadMat(src1), loadMat(src2), dst);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+        cv::gpu::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
+
+        cv::Mat dst_gold = cv::max(src1, src2);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+}
+
+GPU_TEST_P(Max, Scalar)
+{
+    cv::Mat src = randomMat(size, depth);
+    double val = randomDouble(0.0, 255.0);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::max(loadMat(src), val, dst);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+        cv::gpu::max(loadMat(src, useRoi), val, dst);
+
+        cv::Mat dst_gold = cv::max(src, val);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Max, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Pow
+
+PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Pow, Accuracy)
+{
+    cv::Mat src = randomMat(size, depth, 0.0, 10.0);
+    double power = randomDouble(2.0, 4.0);
+
+    if (src.depth() < CV_32F)
+        power = static_cast<int>(power);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::pow(loadMat(src), power, dst);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
+        cv::gpu::pow(loadMat(src, useRoi), power, dst);
+
+        cv::Mat dst_gold;
+        cv::pow(src, power, dst_gold);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Pow, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// AddWeighted
+
+PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth1;
+    int depth2;
+    int dst_depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth1 = GET_PARAM(2);
+        depth2 = GET_PARAM(3);
+        dst_depth = GET_PARAM(4);
+        useRoi = GET_PARAM(5);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(AddWeighted, Accuracy)
+{
+    cv::Mat src1 = randomMat(size, depth1);
+    cv::Mat src2 = randomMat(size, depth2);
+    double alpha = randomDouble(-10.0, 10.0);
+    double beta = randomDouble(-10.0, 10.0);
+    double gamma = randomDouble(-10.0, 10.0);
+
+    if ((depth1 == CV_64F || depth2 == CV_64F || dst_depth == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::addWeighted(loadMat(src1), alpha, loadMat(src2), beta, gamma, dst, dst_depth);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dst_depth, useRoi);
+        cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth);
+
+        cv::Mat dst_gold;
+        cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-3);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, AddWeighted, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    ALL_DEPTH,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Magnitude
+
+PARAM_TEST_CASE(Magnitude, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Magnitude, NPP)
+{
+    cv::Mat src = randomMat(size, CV_32FC2);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::magnitude(loadMat(src, useRoi), dst);
+
+    cv::Mat arr[2];
+    cv::split(src, arr);
+    cv::Mat dst_gold;
+    cv::magnitude(arr[0], arr[1], dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
+}
+
+GPU_TEST_P(Magnitude, Sqr_NPP)
+{
+    cv::Mat src = randomMat(size, CV_32FC2);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::magnitudeSqr(loadMat(src, useRoi), dst);
+
+    cv::Mat arr[2];
+    cv::split(src, arr);
+    cv::Mat dst_gold;
+    cv::magnitude(arr[0], arr[1], dst_gold);
+    cv::multiply(dst_gold, dst_gold, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-1);
+}
+
+GPU_TEST_P(Magnitude, Accuracy)
+{
+    cv::Mat x = randomMat(size, CV_32FC1);
+    cv::Mat y = randomMat(size, CV_32FC1);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::magnitude(loadMat(x, useRoi), loadMat(y, useRoi), dst);
+
+    cv::Mat dst_gold;
+    cv::magnitude(x, y, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
+}
+
+GPU_TEST_P(Magnitude, Sqr_Accuracy)
+{
+    cv::Mat x = randomMat(size, CV_32FC1);
+    cv::Mat y = randomMat(size, CV_32FC1);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::magnitudeSqr(loadMat(x, useRoi), loadMat(y, useRoi), dst);
+
+    cv::Mat dst_gold;
+    cv::magnitude(x, y, dst_gold);
+    cv::multiply(dst_gold, dst_gold, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-1);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Magnitude, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// Phase
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(AngleInDegrees, bool)
+}
+
+PARAM_TEST_CASE(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool angleInDegrees;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        angleInDegrees = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Phase, Accuracy)
+{
+    cv::Mat x = randomMat(size, CV_32FC1);
+    cv::Mat y = randomMat(size, CV_32FC1);
+
+    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::phase(loadMat(x, useRoi), loadMat(y, useRoi), dst, angleInDegrees);
+
+    cv::Mat dst_gold;
+    cv::phase(x, y, dst_gold, angleInDegrees);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, angleInDegrees ? 1e-2 : 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Phase, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// CartToPolar
+
+PARAM_TEST_CASE(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool angleInDegrees;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        angleInDegrees = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(CartToPolar, Accuracy)
+{
+    cv::Mat x = randomMat(size, CV_32FC1);
+    cv::Mat y = randomMat(size, CV_32FC1);
+
+    cv::gpu::GpuMat mag = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::GpuMat angle = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::cartToPolar(loadMat(x, useRoi), loadMat(y, useRoi), mag, angle, angleInDegrees);
+
+    cv::Mat mag_gold;
+    cv::Mat angle_gold;
+    cv::cartToPolar(x, y, mag_gold, angle_gold, angleInDegrees);
+
+    EXPECT_MAT_NEAR(mag_gold, mag, 1e-4);
+    EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, CartToPolar, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// polarToCart
+
+PARAM_TEST_CASE(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool angleInDegrees;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        angleInDegrees = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(PolarToCart, Accuracy)
+{
+    cv::Mat magnitude = randomMat(size, CV_32FC1);
+    cv::Mat angle = randomMat(size, CV_32FC1);
+
+    cv::gpu::GpuMat x = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::GpuMat y = createMat(size, CV_32FC1, useRoi);
+    cv::gpu::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), x, y, angleInDegrees);
+
+    cv::Mat x_gold;
+    cv::Mat y_gold;
+    cv::polarToCart(magnitude, angle, x_gold, y_gold, angleInDegrees);
+
+    EXPECT_MAT_NEAR(x_gold, x, 1e-4);
+    EXPECT_MAT_NEAR(y_gold, y, 1e-4);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, PolarToCart, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
+    WHOLE_SUBMAT));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// Threshold
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
+
+PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int threshOp;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        threshOp = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Threshold, Accuracy)
+{
+    cv::Mat src = randomMat(size, type);
+    double maxVal = randomDouble(20.0, 127.0);
+    double thresh = randomDouble(0.0, maxVal);
+
+    cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi);
+    cv::gpu::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp);
+
+    cv::Mat dst_gold;
+    cv::threshold(src, dst_gold, thresh, maxVal, threshOp);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Threshold, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)),
+    ALL_THRESH_OPS,
+    WHOLE_SUBMAT));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuarithm/test/test_reductions.cpp b/modules/gpuarithm/test/test_reductions.cpp
new file mode 100644
index 000000000..cd25ba72f
--- /dev/null
+++ b/modules/gpuarithm/test/test_reductions.cpp
@@ -0,0 +1,819 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+////////////////////////////////////////////////////////////////////////////////
+// Norm
+
+PARAM_TEST_CASE(Norm, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    int normCode;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        normCode = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Norm, Accuracy)
+{
+    cv::Mat src = randomMat(size, depth);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
+
+    cv::gpu::GpuMat d_buf;
+    double val = cv::gpu::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi), d_buf);
+
+    double val_gold = cv::norm(src, normCode, mask);
+
+    EXPECT_NEAR(val_gold, val, depth < CV_32F ? 0.0 : 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Norm, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U),
+                    MatDepth(CV_8S),
+                    MatDepth(CV_16U),
+                    MatDepth(CV_16S),
+                    MatDepth(CV_32S),
+                    MatDepth(CV_32F)),
+    testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// normDiff
+
+PARAM_TEST_CASE(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormCode, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int normCode;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        normCode = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(NormDiff, Accuracy)
+{
+    cv::Mat src1 = randomMat(size, CV_8UC1);
+    cv::Mat src2 = randomMat(size, CV_8UC1);
+
+    double val = cv::gpu::norm(loadMat(src1, useRoi), loadMat(src2, useRoi), normCode);
+
+    double val_gold = cv::norm(src1, src2, normCode);
+
+    EXPECT_NEAR(val_gold, val, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, NormDiff, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)),
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// Sum
+
+namespace
+{
+    template <typename T>
+    cv::Scalar absSumImpl(const cv::Mat& src)
+    {
+        const int cn = src.channels();
+
+        cv::Scalar sum = cv::Scalar::all(0);
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+            {
+                for (int c = 0; c < cn; ++c)
+                    sum[c] += std::abs(src.at<T>(y, x * cn + c));
+            }
+        }
+
+        return sum;
+    }
+
+    cv::Scalar absSumGold(const cv::Mat& src)
+    {
+        typedef cv::Scalar (*func_t)(const cv::Mat& src);
+
+        static const func_t funcs[] =
+        {
+            absSumImpl<uchar>,
+            absSumImpl<schar>,
+            absSumImpl<ushort>,
+            absSumImpl<short>,
+            absSumImpl<int>,
+            absSumImpl<float>,
+            absSumImpl<double>
+        };
+
+        return funcs[src.depth()](src);
+    }
+
+    template <typename T>
+    cv::Scalar sqrSumImpl(const cv::Mat& src)
+    {
+        const int cn = src.channels();
+
+        cv::Scalar sum = cv::Scalar::all(0);
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+            {
+                for (int c = 0; c < cn; ++c)
+                {
+                    const T val = src.at<T>(y, x * cn + c);
+                    sum[c] += val * val;
+                }
+            }
+        }
+
+        return sum;
+    }
+
+    cv::Scalar sqrSumGold(const cv::Mat& src)
+    {
+        typedef cv::Scalar (*func_t)(const cv::Mat& src);
+
+        static const func_t funcs[] =
+        {
+            sqrSumImpl<uchar>,
+            sqrSumImpl<schar>,
+            sqrSumImpl<ushort>,
+            sqrSumImpl<short>,
+            sqrSumImpl<int>,
+            sqrSumImpl<float>,
+            sqrSumImpl<double>
+        };
+
+        return funcs[src.depth()](src);
+    }
+}
+
+PARAM_TEST_CASE(Sum, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    bool useRoi;
+
+    cv::Mat src;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        src = randomMat(size, type, -128.0, 128.0);
+    }
+};
+
+GPU_TEST_P(Sum, Simple)
+{
+    cv::Scalar val = cv::gpu::sum(loadMat(src, useRoi));
+
+    cv::Scalar val_gold = cv::sum(src);
+
+    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
+}
+
+GPU_TEST_P(Sum, Abs)
+{
+    cv::Scalar val = cv::gpu::absSum(loadMat(src, useRoi));
+
+    cv::Scalar val_gold = absSumGold(src);
+
+    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
+}
+
+GPU_TEST_P(Sum, Sqr)
+{
+    cv::Scalar val = cv::gpu::sqrSum(loadMat(src, useRoi));
+
+    cv::Scalar val_gold = sqrSumGold(src);
+
+    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sum, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    TYPES(CV_8U, CV_64F, 1, 4),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// MinMax
+
+PARAM_TEST_CASE(MinMax, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MinMax, WithoutMask)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            double minVal, maxVal;
+            cv::gpu::minMax(loadMat(src), &minVal, &maxVal);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        double minVal, maxVal;
+        cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal);
+
+        double minVal_gold, maxVal_gold;
+        minMaxLocGold(src, &minVal_gold, &maxVal_gold);
+
+        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+    }
+}
+
+GPU_TEST_P(MinMax, WithMask)
+{
+    cv::Mat src = randomMat(size, depth);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            double minVal, maxVal;
+            cv::gpu::minMax(loadMat(src), &minVal, &maxVal, loadMat(mask));
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        double minVal, maxVal;
+        cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi));
+
+        double minVal_gold, maxVal_gold;
+        minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0, mask);
+
+        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+    }
+}
+
+GPU_TEST_P(MinMax, NullPtr)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            double minVal, maxVal;
+            cv::gpu::minMax(loadMat(src), &minVal, 0);
+            cv::gpu::minMax(loadMat(src), 0, &maxVal);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        double minVal, maxVal;
+        cv::gpu::minMax(loadMat(src, useRoi), &minVal, 0);
+        cv::gpu::minMax(loadMat(src, useRoi), 0, &maxVal);
+
+        double minVal_gold, maxVal_gold;
+        minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0);
+
+        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MinMax, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// MinMaxLoc
+
+namespace
+{
+    template <typename T>
+    void expectEqualImpl(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
+    {
+        EXPECT_EQ(src.at<T>(loc_gold.y, loc_gold.x), src.at<T>(loc.y, loc.x));
+    }
+
+    void expectEqual(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
+    {
+        typedef void (*func_t)(const cv::Mat& src, cv::Point loc_gold, cv::Point loc);
+
+        static const func_t funcs[] =
+        {
+            expectEqualImpl<uchar>,
+            expectEqualImpl<schar>,
+            expectEqualImpl<ushort>,
+            expectEqualImpl<short>,
+            expectEqualImpl<int>,
+            expectEqualImpl<float>,
+            expectEqualImpl<double>
+        };
+
+        funcs[src.depth()](src, loc_gold, loc);
+    }
+}
+
+PARAM_TEST_CASE(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MinMaxLoc, WithoutMask)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            double minVal, maxVal;
+            cv::Point minLoc, maxLoc;
+            cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        double minVal, maxVal;
+        cv::Point minLoc, maxLoc;
+        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc);
+
+        double minVal_gold, maxVal_gold;
+        cv::Point minLoc_gold, maxLoc_gold;
+        minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
+
+        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+
+        expectEqual(src, minLoc_gold, minLoc);
+        expectEqual(src, maxLoc_gold, maxLoc);
+    }
+}
+
+GPU_TEST_P(MinMaxLoc, WithMask)
+{
+    cv::Mat src = randomMat(size, depth);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            double minVal, maxVal;
+            cv::Point minLoc, maxLoc;
+            cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask));
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        double minVal, maxVal;
+        cv::Point minLoc, maxLoc;
+        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi));
+
+        double minVal_gold, maxVal_gold;
+        cv::Point minLoc_gold, maxLoc_gold;
+        minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold, mask);
+
+        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+
+        expectEqual(src, minLoc_gold, minLoc);
+        expectEqual(src, maxLoc_gold, maxLoc);
+    }
+}
+
+GPU_TEST_P(MinMaxLoc, NullPtr)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            double minVal, maxVal;
+            cv::Point minLoc, maxLoc;
+            cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
+            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
+            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
+            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        double minVal, maxVal;
+        cv::Point minLoc, maxLoc;
+        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
+        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
+        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
+        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
+
+        double minVal_gold, maxVal_gold;
+        cv::Point minLoc_gold, maxLoc_gold;
+        minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
+
+        EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+        EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+
+        expectEqual(src, minLoc_gold, minLoc);
+        expectEqual(src, maxLoc_gold, maxLoc);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MinMaxLoc, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////
+// CountNonZero
+
+PARAM_TEST_CASE(CountNonZero, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    bool useRoi;
+
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(CountNonZero, Accuracy)
+{
+    cv::Mat srcBase = randomMat(size, CV_8U, 0.0, 1.5);
+    cv::Mat src;
+    srcBase.convertTo(src, depth);
+
+    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::countNonZero(loadMat(src));
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        int val = cv::gpu::countNonZero(loadMat(src, useRoi));
+
+        int val_gold = cv::countNonZero(src);
+
+        ASSERT_EQ(val_gold, val);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, CountNonZero, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// Reduce
+
+CV_ENUM(ReduceCode, cv::REDUCE_SUM, cv::REDUCE_AVG, cv::REDUCE_MAX, cv::REDUCE_MIN)
+#define ALL_REDUCE_CODES testing::Values(ReduceCode(cv::REDUCE_SUM), ReduceCode(cv::REDUCE_AVG), ReduceCode(cv::REDUCE_MAX), ReduceCode(cv::REDUCE_MIN))
+
+PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int depth;
+    int channels;
+    int reduceOp;
+    bool useRoi;
+
+    int type;
+    int dst_depth;
+    int dst_type;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        channels = GET_PARAM(3);
+        reduceOp = GET_PARAM(4);
+        useRoi = GET_PARAM(5);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        type = CV_MAKE_TYPE(depth, channels);
+
+        if (reduceOp == cv::REDUCE_MAX || reduceOp == cv::REDUCE_MIN)
+            dst_depth = depth;
+        else if (reduceOp == cv::REDUCE_SUM)
+            dst_depth = depth == CV_8U ? CV_32S : depth < CV_64F ? CV_32F : depth;
+        else
+            dst_depth = depth < CV_32F ? CV_32F : depth;
+
+        dst_type = CV_MAKE_TYPE(dst_depth, channels);
+    }
+
+};
+
+GPU_TEST_P(Reduce, Rows)
+{
+    cv::Mat src = randomMat(size, type);
+
+    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols, 1), dst_type, useRoi);
+    cv::gpu::reduce(loadMat(src, useRoi), dst, 0, reduceOp, dst_depth);
+
+    cv::Mat dst_gold;
+    cv::reduce(src, dst_gold, 0, reduceOp, dst_depth);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 0.0 : 0.02);
+}
+
+GPU_TEST_P(Reduce, Cols)
+{
+    cv::Mat src = randomMat(size, type);
+
+    cv::gpu::GpuMat dst = createMat(cv::Size(src.rows, 1), dst_type, useRoi);
+    cv::gpu::reduce(loadMat(src, useRoi), dst, 1, reduceOp, dst_depth);
+
+    cv::Mat dst_gold;
+    cv::reduce(src, dst_gold, 1, reduceOp, dst_depth);
+    dst_gold.cols = dst_gold.rows;
+    dst_gold.rows = 1;
+    dst_gold.step = dst_gold.cols * dst_gold.elemSize();
+
+    EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 0.0 : 0.02);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Reduce, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatDepth(CV_8U),
+                    MatDepth(CV_16U),
+                    MatDepth(CV_16S),
+                    MatDepth(CV_32F),
+                    MatDepth(CV_64F)),
+    ALL_CHANNELS,
+    ALL_REDUCE_CODES,
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////////////////////////////
+// Normalize
+
+PARAM_TEST_CASE(Normalize, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int norm_type;
+    bool useRoi;
+
+    double alpha;
+    double beta;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        norm_type = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        alpha = 1;
+        beta = 0;
+    }
+
+};
+
+GPU_TEST_P(Normalize, WithOutMask)
+{
+    cv::Mat src = randomMat(size, type);
+
+    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::gpu::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type);
+
+    cv::Mat dst_gold;
+    cv::normalize(src, dst_gold, alpha, beta, norm_type, type);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
+}
+
+GPU_TEST_P(Normalize, WithMask)
+{
+    cv::Mat src = randomMat(size, type);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
+
+    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    dst.setTo(cv::Scalar::all(0));
+    cv::gpu::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type, loadMat(mask, useRoi));
+
+    cv::Mat dst_gold(size, type);
+    dst_gold.setTo(cv::Scalar::all(0));
+    cv::normalize(src, dst_gold, alpha, beta, norm_type, type, mask);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, Normalize, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    ALL_DEPTH,
+    testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF), NormCode(cv::NORM_MINMAX)),
+    WHOLE_SUBMAT));
+
+////////////////////////////////////////////////////////////////////////////////
+// MeanStdDev
+
+PARAM_TEST_CASE(MeanStdDev, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MeanStdDev, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    if (!supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_13))
+    {
+        try
+        {
+            cv::Scalar mean;
+            cv::Scalar stddev;
+            cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
+        cv::Scalar mean;
+        cv::Scalar stddev;
+        cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
+
+        cv::Scalar mean_gold;
+        cv::Scalar stddev_gold;
+        cv::meanStdDev(src, mean_gold, stddev_gold);
+
+        EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5);
+        EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Arithm, MeanStdDev, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
+#endif // HAVE_CUDA

From c0b3424a23eb41b25c19b87997734315944fe0d5 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:46:28 +0400
Subject: [PATCH 35/49] gpuimgproc module fixes

---
 modules/gpuimgproc/CMakeLists.txt             |    4 +-
 modules/gpuimgproc/doc/color.rst              |   74 ++
 modules/gpuimgproc/doc/feature_detection.rst  |   81 ++
 modules/gpuimgproc/doc/gpuimgproc.rst         |   12 +-
 modules/gpuimgproc/doc/histogram.rst          |  104 ++
 modules/gpuimgproc/doc/hough.rst              |   96 ++
 modules/gpuimgproc/doc/image_processing.rst   |  589 ---------
 modules/gpuimgproc/doc/imgproc.rst            |  203 +++
 .../gpuimgproc/include/opencv2/gpuimgproc.hpp |  213 ++--
 .../gpuimgproc/perf/perf_bilateral_filter.cpp |   93 ++
 modules/gpuimgproc/perf/perf_blend.cpp        |   86 ++
 modules/gpuimgproc/perf/perf_canny.cpp        |   87 ++
 modules/gpuimgproc/perf/perf_color.cpp        |  252 ++++
 modules/gpuimgproc/perf/perf_corners.cpp      |  137 ++
 modules/gpuimgproc/perf/perf_gftt.cpp         |   86 ++
 modules/gpuimgproc/perf/perf_histogram.cpp    |  221 ++++
 modules/gpuimgproc/perf/perf_hough.cpp        |  317 +++++
 modules/gpuimgproc/perf/perf_imgproc.cpp      | 1133 -----------------
 .../gpuimgproc/perf/perf_match_template.cpp   |  131 ++
 modules/gpuimgproc/perf/perf_mean_shift.cpp   |  152 +++
 modules/gpuimgproc/src/blend.cpp              |    3 +
 modules/gpuimgproc/src/canny.cpp              |  186 +++
 modules/gpuimgproc/src/color.cpp              |   93 +-
 modules/gpuimgproc/src/corners.cpp            |  149 +++
 .../src/cuda/{imgproc.cu => corners.cu}       |  138 +-
 modules/gpuimgproc/src/cuda/mean_shift.cu     |  182 +++
 modules/gpuimgproc/src/gftt.cpp               |    9 +-
 .../src/{imgproc.cpp => histogram.cpp}        |  408 +-----
 modules/gpuimgproc/src/match_template.cpp     |   46 +-
 modules/gpuimgproc/src/mean_shift.cpp         |  128 ++
 modules/gpuimgproc/src/precomp.hpp            |    7 +-
 .../gpuimgproc/test/test_bilateral_filter.cpp |   97 ++
 modules/gpuimgproc/test/test_blend.cpp        |  124 ++
 modules/gpuimgproc/test/test_canny.cpp        |  114 ++
 modules/gpuimgproc/test/test_corners.cpp      |  145 +++
 modules/gpuimgproc/test/test_gftt.cpp         |  131 ++
 modules/gpuimgproc/test/test_histogram.cpp    |  227 ++++
 modules/gpuimgproc/test/test_imgproc.cpp      |  890 -------------
 .../gpuimgproc/test/test_match_template.cpp   |  305 +++++
 modules/gpuimgproc/test/test_mean_shift.cpp   |  174 +++
 40 files changed, 4364 insertions(+), 3263 deletions(-)
 create mode 100644 modules/gpuimgproc/doc/color.rst
 create mode 100644 modules/gpuimgproc/doc/feature_detection.rst
 create mode 100644 modules/gpuimgproc/doc/histogram.rst
 create mode 100644 modules/gpuimgproc/doc/hough.rst
 delete mode 100644 modules/gpuimgproc/doc/image_processing.rst
 create mode 100644 modules/gpuimgproc/doc/imgproc.rst
 create mode 100644 modules/gpuimgproc/perf/perf_bilateral_filter.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_blend.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_canny.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_color.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_corners.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_gftt.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_histogram.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_hough.cpp
 delete mode 100644 modules/gpuimgproc/perf/perf_imgproc.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_match_template.cpp
 create mode 100644 modules/gpuimgproc/perf/perf_mean_shift.cpp
 create mode 100644 modules/gpuimgproc/src/canny.cpp
 create mode 100644 modules/gpuimgproc/src/corners.cpp
 rename modules/gpuimgproc/src/cuda/{imgproc.cu => corners.cu} (65%)
 create mode 100644 modules/gpuimgproc/src/cuda/mean_shift.cu
 rename modules/gpuimgproc/src/{imgproc.cpp => histogram.cpp} (60%)
 create mode 100644 modules/gpuimgproc/src/mean_shift.cpp
 create mode 100644 modules/gpuimgproc/test/test_bilateral_filter.cpp
 create mode 100644 modules/gpuimgproc/test/test_blend.cpp
 create mode 100644 modules/gpuimgproc/test/test_canny.cpp
 create mode 100644 modules/gpuimgproc/test/test_corners.cpp
 create mode 100644 modules/gpuimgproc/test/test_gftt.cpp
 create mode 100644 modules/gpuimgproc/test/test_histogram.cpp
 delete mode 100644 modules/gpuimgproc/test/test_imgproc.cpp
 create mode 100644 modules/gpuimgproc/test/test_match_template.cpp
 create mode 100644 modules/gpuimgproc/test/test_mean_shift.cpp

diff --git a/modules/gpuimgproc/CMakeLists.txt b/modules/gpuimgproc/CMakeLists.txt
index 19a66dca9..86bcc0335 100644
--- a/modules/gpuimgproc/CMakeLists.txt
+++ b/modules/gpuimgproc/CMakeLists.txt
@@ -4,6 +4,6 @@ endif()
 
 set(the_description "GPU-accelerated Image Processing")
 
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_define_module(gpuimgproc opencv_imgproc opencv_gpuarithm opencv_gpufilters)
+ocv_define_module(gpuimgproc opencv_imgproc opencv_gpufilters OPTIONAL opencv_gpuarithm)
diff --git a/modules/gpuimgproc/doc/color.rst b/modules/gpuimgproc/doc/color.rst
new file mode 100644
index 000000000..70de236ea
--- /dev/null
+++ b/modules/gpuimgproc/doc/color.rst
@@ -0,0 +1,74 @@
+Color space processing
+======================
+
+.. highlight:: cpp
+
+
+
+gpu::cvtColor
+-----------------
+Converts an image from one color space to another.
+
+.. ocv:function:: void gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null())
+
+    :param src: Source image with  ``CV_8U`` , ``CV_16U`` , or  ``CV_32F`` depth and 1, 3, or 4 channels.
+
+    :param dst: Destination image with the same size and depth as  ``src`` .
+
+    :param code: Color space conversion code. For details, see  :ocv:func:`cvtColor` . Conversion to/from Luv and Bayer color spaces is not supported.
+
+    :param dcn: Number of channels in the destination image. If the parameter is 0, the number of the channels is derived automatically from  ``src`` and the  ``code`` .
+
+    :param stream: Stream for the asynchronous version.
+
+3-channel color spaces (like ``HSV``, ``XYZ``, and so on) can be stored in a 4-channel image for better performance.
+
+.. seealso:: :ocv:func:`cvtColor`
+
+
+
+gpu::swapChannels
+-----------------
+Exchanges the color channels of an image in-place.
+
+.. ocv:function:: void gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null())
+
+    :param image: Source image. Supports only ``CV_8UC4`` type.
+
+    :param dstOrder: Integer array describing how channel values are permutated. The n-th entry of the array contains the number of the channel that is stored in the n-th channel of the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR channel order.
+
+    :param stream: Stream for the asynchronous version.
+
+The methods support arbitrary permutations of the original channels, including replication.
+
+
+
+gpu::alphaComp
+-------------------
+Composites two images using alpha opacity values contained in each image.
+
+.. ocv:function:: void gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null())
+
+    :param img1: First image. Supports ``CV_8UC4`` , ``CV_16UC4`` , ``CV_32SC4`` and ``CV_32FC4`` types.
+
+    :param img2: Second image. Must have the same size and the same type as ``img1`` .
+
+    :param dst: Destination image.
+
+    :param alpha_op: Flag specifying the alpha-blending operation:
+
+            * **ALPHA_OVER**
+            * **ALPHA_IN**
+            * **ALPHA_OUT**
+            * **ALPHA_ATOP**
+            * **ALPHA_XOR**
+            * **ALPHA_PLUS**
+            * **ALPHA_OVER_PREMUL**
+            * **ALPHA_IN_PREMUL**
+            * **ALPHA_OUT_PREMUL**
+            * **ALPHA_ATOP_PREMUL**
+            * **ALPHA_XOR_PREMUL**
+            * **ALPHA_PLUS_PREMUL**
+            * **ALPHA_PREMUL**
+
+    :param stream: Stream for the asynchronous version.
diff --git a/modules/gpuimgproc/doc/feature_detection.rst b/modules/gpuimgproc/doc/feature_detection.rst
new file mode 100644
index 000000000..c38b8c200
--- /dev/null
+++ b/modules/gpuimgproc/doc/feature_detection.rst
@@ -0,0 +1,81 @@
+Feature Detection
+=================
+
+.. highlight:: cpp
+
+
+
+gpu::cornerHarris
+---------------------
+Computes the Harris cornerness criteria at each image pixel.
+
+.. ocv:function:: void gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101)
+
+    :param src: Source image. Only  ``CV_8UC1`` and  ``CV_32FC1`` images are supported for now.
+
+    :param dst: Destination image containing cornerness values. It has the same size as ``src`` and ``CV_32FC1`` type.
+
+    :param blockSize: Neighborhood size.
+
+    :param ksize: Aperture parameter for the Sobel operator.
+
+    :param k: Harris detector free parameter.
+
+    :param borderType: Pixel extrapolation method. Only  ``BORDER_REFLECT101`` and  ``BORDER_REPLICATE`` are supported for now.
+
+.. seealso:: :ocv:func:`cornerHarris`
+
+
+
+gpu::cornerMinEigenVal
+--------------------------
+Computes the minimum eigen value of a 2x2 derivative covariation matrix at each pixel (the cornerness criteria).
+
+.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101)
+
+.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101)
+
+.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null())
+
+    :param src: Source image. Only  ``CV_8UC1`` and  ``CV_32FC1`` images are supported for now.
+
+    :param dst: Destination image containing cornerness values. The size is the same. The type is  ``CV_32FC1`` .
+
+    :param blockSize: Neighborhood size.
+
+    :param ksize: Aperture parameter for the Sobel operator.
+
+    :param borderType: Pixel extrapolation method. Only ``BORDER_REFLECT101`` and ``BORDER_REPLICATE`` are supported for now.
+
+.. seealso:: :ocv:func:`cornerMinEigenVal`
+
+
+
+gpu::GoodFeaturesToTrackDetector_GPU
+------------------------------------
+.. ocv:class:: gpu::GoodFeaturesToTrackDetector_GPU
+
+Class used for strong corners detection on an image. ::
+
+    class GoodFeaturesToTrackDetector_GPU
+    {
+    public:
+        explicit GoodFeaturesToTrackDetector_GPU(int maxCorners_ = 1000, double qualityLevel_ = 0.01, double minDistance_ = 0.0,
+            int blockSize_ = 3, bool useHarrisDetector_ = false, double harrisK_ = 0.04);
+
+        void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
+
+        int maxCorners;
+        double qualityLevel;
+        double minDistance;
+
+        int blockSize;
+        bool useHarrisDetector;
+        double harrisK;
+
+        void releaseMemory();
+    };
+
+The class finds the most prominent corners in the image.
+
+.. seealso:: :ocv:func:`goodFeaturesToTrack`
diff --git a/modules/gpuimgproc/doc/gpuimgproc.rst b/modules/gpuimgproc/doc/gpuimgproc.rst
index d4cba96a4..827b735f5 100644
--- a/modules/gpuimgproc/doc/gpuimgproc.rst
+++ b/modules/gpuimgproc/doc/gpuimgproc.rst
@@ -1,8 +1,12 @@
-*************************************
-gpu. GPU-accelerated Image Processing
-*************************************
+********************************************
+gpuimgproc. GPU-accelerated Image Processing
+********************************************
 
 .. toctree::
     :maxdepth: 1
 
-    image_processing
+    color
+    histogram
+    hough
+    feature_detection
+    imgproc
diff --git a/modules/gpuimgproc/doc/histogram.rst b/modules/gpuimgproc/doc/histogram.rst
new file mode 100644
index 000000000..7b29de6ba
--- /dev/null
+++ b/modules/gpuimgproc/doc/histogram.rst
@@ -0,0 +1,104 @@
+Histogram Calculation
+=====================
+
+.. highlight:: cpp
+
+
+
+gpu::evenLevels
+-------------------
+Computes levels with even distribution.
+
+.. ocv:function:: void gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)
+
+    :param levels: Destination array.  ``levels`` has 1 row, ``nLevels`` columns, and the ``CV_32SC1`` type.
+
+    :param nLevels: Number of computed levels.  ``nLevels`` must be at least 2.
+
+    :param lowerLevel: Lower boundary value of the lowest level.
+
+    :param upperLevel: Upper boundary value of the greatest level.
+
+
+
+gpu::histEven
+-----------------
+Calculates a histogram with evenly distributed bins.
+
+.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() )
+
+.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() )
+
+    :param src: Source image. ``CV_8U``, ``CV_16U``, or ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately.
+
+    :param hist: Destination histogram with one row, ``histSize`` columns, and the ``CV_32S`` type.
+
+    :param histSize: Size of the histogram.
+
+    :param lowerLevel: Lower boundary of lowest-level bin.
+
+    :param upperLevel: Upper boundary of highest-level bin.
+
+    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::histRange
+------------------
+Calculates a histogram with bins determined by the ``levels`` array.
+
+.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null())
+
+    :param src: Source image. ``CV_8U`` , ``CV_16U`` , or  ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately.
+
+    :param hist: Destination histogram with one row, ``(levels.cols-1)`` columns, and the  ``CV_32SC1`` type.
+
+    :param levels: Number of levels in the histogram.
+
+    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::calcHist
+------------------
+Calculates histogram for one channel 8-bit image.
+
+.. ocv:function:: void gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null())
+
+    :param src: Source image.
+
+    :param hist: Destination histogram with one row, 256 columns, and the  ``CV_32SC1`` type.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::equalizeHist
+------------------
+Equalizes the histogram of a grayscale image.
+
+.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null())
+
+    :param src: Source image.
+
+    :param dst: Destination image.
+
+    :param hist: Destination histogram with one row, 256 columns, and the  ``CV_32SC1`` type.
+
+    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`equalizeHist`
diff --git a/modules/gpuimgproc/doc/hough.rst b/modules/gpuimgproc/doc/hough.rst
new file mode 100644
index 000000000..33afabbb6
--- /dev/null
+++ b/modules/gpuimgproc/doc/hough.rst
@@ -0,0 +1,96 @@
+Hough Transform
+===============
+
+.. highlight:: cpp
+
+
+
+gpu::HoughLines
+---------------
+Finds lines in a binary image using the classical Hough transform.
+
+.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096)
+
+.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096)
+
+    :param src: 8-bit, single-channel binary source image.
+
+    :param lines: Output vector of lines. Each line is represented by a two-element vector  :math:`(\rho, \theta)` .  :math:`\rho`  is the distance from the coordinate origin  :math:`(0,0)`  (top-left corner of the image).  :math:`\theta`  is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ).
+
+    :param rho: Distance resolution of the accumulator in pixels.
+
+    :param theta: Angle resolution of the accumulator in radians.
+
+    :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ).
+
+    :param doSort: Performs lines sort by votes.
+
+    :param maxLines: Maximum number of output lines.
+
+    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+
+.. seealso:: :ocv:func:`HoughLines`
+
+
+
+gpu::HoughLinesDownload
+-----------------------
+Downloads results from :ocv:func:`gpu::HoughLines` to host memory.
+
+.. ocv:function:: void gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray())
+
+    :param d_lines: Result of :ocv:func:`gpu::HoughLines` .
+
+    :param h_lines: Output host array.
+
+    :param h_votes: Optional output array for line's votes.
+
+.. seealso:: :ocv:func:`gpu::HoughLines`
+
+
+
+gpu::HoughCircles
+-----------------
+Finds circles in a grayscale image using the Hough transform.
+
+.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
+
+.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
+
+    :param src: 8-bit, single-channel grayscale input image.
+
+    :param circles: Output vector of found circles. Each vector is encoded as a 3-element floating-point vector  :math:`(x, y, radius)` .
+
+    :param method: Detection method to use. Currently, the only implemented method is  ``CV_HOUGH_GRADIENT`` , which is basically  *21HT* , described in  [Yuen90]_.
+
+    :param dp: Inverse ratio of the accumulator resolution to the image resolution. For example, if  ``dp=1`` , the accumulator has the same resolution as the input image. If  ``dp=2`` , the accumulator has half as big width and height.
+
+    :param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed.
+
+    :param cannyThreshold: The higher threshold of the two passed to  the :ocv:func:`gpu::Canny`  edge detector (the lower one is twice smaller).
+
+    :param votesThreshold: The accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected.
+
+    :param minRadius: Minimum circle radius.
+
+    :param maxRadius: Maximum circle radius.
+
+    :param maxCircles: Maximum number of output circles.
+
+    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+
+.. seealso:: :ocv:func:`HoughCircles`
+
+
+
+gpu::HoughCirclesDownload
+-------------------------
+Downloads results from :ocv:func:`gpu::HoughCircles` to host memory.
+
+.. ocv:function:: void gpu::HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles)
+
+    :param d_circles: Result of :ocv:func:`gpu::HoughCircles` .
+
+    :param h_circles: Output host array.
+
+.. seealso:: :ocv:func:`gpu::HoughCircles`
diff --git a/modules/gpuimgproc/doc/image_processing.rst b/modules/gpuimgproc/doc/image_processing.rst
deleted file mode 100644
index 352288672..000000000
--- a/modules/gpuimgproc/doc/image_processing.rst
+++ /dev/null
@@ -1,589 +0,0 @@
-Image Processing
-================
-
-.. highlight:: cpp
-
-
-
-gpu::meanShiftFiltering
----------------------------
-Performs mean-shift filtering for each point of the source image.
-
-.. ocv:function:: void gpu::meanShiftFiltering( const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() )
-
-    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
-
-    :param dst: Destination image containing the color of mapped points. It has the same size and type as  ``src`` .
-
-    :param sp: Spatial window radius.
-
-    :param sr: Color window radius.
-
-    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
-
-It maps each point of the source image into another point. As a result, you have a new color and new position of each point.
-
-
-
-gpu::meanShiftProc
-----------------------
-Performs a mean-shift procedure and stores information about processed points (their colors and positions) in two images.
-
-.. ocv:function:: void gpu::meanShiftProc( const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() )
-
-    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
-
-    :param dstr: Destination image containing the color of mapped points. The size and type is the same as  ``src`` .
-
-    :param dstsp: Destination image containing the position of mapped points. The size is the same as  ``src`` size. The type is  ``CV_16SC2`` .
-
-    :param sp: Spatial window radius.
-
-    :param sr: Color window radius.
-
-    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
-
-.. seealso:: :ocv:func:`gpu::meanShiftFiltering`
-
-
-
-gpu::meanShiftSegmentation
-------------------------------
-Performs a mean-shift segmentation of the source image and eliminates small segments.
-
-.. ocv:function:: void gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
-
-    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
-
-    :param dst: Segmented image with the same size and type as  ``src`` .
-
-    :param sp: Spatial window radius.
-
-    :param sr: Color window radius.
-
-    :param minsize: Minimum segment size. Smaller segments are merged.
-
-    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
-
-
-
-gpu::integral
------------------
-Computes an integral image.
-
-.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null())
-
-    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
-
-    :param sum: Integral image containing 32-bit unsigned integer values packed into  ``CV_32SC1`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`integral`
-
-
-
-gpu::sqrIntegral
---------------------
-Computes a squared integral image.
-
-.. ocv:function:: void gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null())
-
-    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
-
-    :param sqsum: Squared integral image containing 64-bit unsigned integer values packed into  ``CV_64FC1`` .
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::cornerHarris
----------------------
-Computes the Harris cornerness criteria at each image pixel.
-
-.. ocv:function:: void gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101)
-
-    :param src: Source image. Only  ``CV_8UC1`` and  ``CV_32FC1`` images are supported for now.
-
-    :param dst: Destination image containing cornerness values. It has the same size as ``src`` and ``CV_32FC1`` type.
-
-    :param blockSize: Neighborhood size.
-
-    :param ksize: Aperture parameter for the Sobel operator.
-
-    :param k: Harris detector free parameter.
-
-    :param borderType: Pixel extrapolation method. Only  ``BORDER_REFLECT101`` and  ``BORDER_REPLICATE`` are supported for now.
-
-.. seealso:: :ocv:func:`cornerHarris`
-
-
-
-gpu::cornerMinEigenVal
---------------------------
-Computes the minimum eigen value of a 2x2 derivative covariation matrix at each pixel (the cornerness criteria).
-
-.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101)
-
-.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101)
-
-.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null())
-
-    :param src: Source image. Only  ``CV_8UC1`` and  ``CV_32FC1`` images are supported for now.
-
-    :param dst: Destination image containing cornerness values. The size is the same. The type is  ``CV_32FC1`` .
-
-    :param blockSize: Neighborhood size.
-
-    :param ksize: Aperture parameter for the Sobel operator.
-
-    :param borderType: Pixel extrapolation method. Only ``BORDER_REFLECT101`` and ``BORDER_REPLICATE`` are supported for now.
-
-.. seealso:: :ocv:func:`cornerMinEigenVal`
-
-
-
-gpu::MatchTemplateBuf
----------------------
-.. ocv:struct:: gpu::MatchTemplateBuf
-
-Class providing memory buffers for :ocv:func:`gpu::matchTemplate` function, plus it allows to adjust some specific parameters. ::
-
-    struct CV_EXPORTS MatchTemplateBuf
-    {
-        Size user_block_size;
-        GpuMat imagef, templf;
-        std::vector<GpuMat> images;
-        std::vector<GpuMat> image_sums;
-        std::vector<GpuMat> image_sqsums;
-    };
-
-You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::matchTemplate` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
-
-
-
-gpu::matchTemplate
-----------------------
-Computes a proximity map for a raster template and an image where the template is searched for.
-
-.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null())
-
-.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null())
-
-    :param image: Source image.  ``CV_32F`` and  ``CV_8U`` depth images (1..4 channels) are supported for now.
-
-    :param templ: Template image with the size and type the same as  ``image`` .
-
-    :param result: Map containing comparison results ( ``CV_32FC1`` ). If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
-
-    :param method: Specifies the way to compare the template with the image.
-
-    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::MatchTemplateBuf`.
-
-    :param stream: Stream for the asynchronous version.
-
-    The following methods are supported for the ``CV_8U`` depth images for now:
-
-    * ``CV_TM_SQDIFF``
-    * ``CV_TM_SQDIFF_NORMED``
-    * ``CV_TM_CCORR``
-    * ``CV_TM_CCORR_NORMED``
-    * ``CV_TM_CCOEFF``
-    * ``CV_TM_CCOEFF_NORMED``
-
-    The following methods are supported for the ``CV_32F`` images for now:
-
-    * ``CV_TM_SQDIFF``
-    * ``CV_TM_CCORR``
-
-.. seealso:: :ocv:func:`matchTemplate`
-
-
-
-gpu::cvtColor
------------------
-Converts an image from one color space to another.
-
-.. ocv:function:: void gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null())
-
-    :param src: Source image with  ``CV_8U`` , ``CV_16U`` , or  ``CV_32F`` depth and 1, 3, or 4 channels.
-
-    :param dst: Destination image with the same size and depth as  ``src`` .
-
-    :param code: Color space conversion code. For details, see  :ocv:func:`cvtColor` . Conversion to/from Luv and Bayer color spaces is not supported.
-
-    :param dcn: Number of channels in the destination image. If the parameter is 0, the number of the channels is derived automatically from  ``src`` and the  ``code`` .
-
-    :param stream: Stream for the asynchronous version.
-
-3-channel color spaces (like ``HSV``, ``XYZ``, and so on) can be stored in a 4-channel image for better performance.
-
-.. seealso:: :ocv:func:`cvtColor`
-
-
-
-gpu::swapChannels
------------------
-Exchanges the color channels of an image in-place.
-
-.. ocv:function:: void gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null())
-
-    :param image: Source image. Supports only ``CV_8UC4`` type.
-
-    :param dstOrder: Integer array describing how channel values are permutated. The n-th entry of the array contains the number of the channel that is stored in the n-th channel of the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR channel order.
-
-    :param stream: Stream for the asynchronous version.
-
-The methods support arbitrary permutations of the original channels, including replication.
-
-
-
-gpu::rectStdDev
--------------------
-Computes a standard deviation of integral images.
-
-.. ocv:function:: void gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null())
-
-    :param src: Source image. Only the ``CV_32SC1`` type is supported.
-
-    :param sqr: Squared source image. Only  the ``CV_32FC1`` type is supported.
-
-    :param dst: Destination image with the same type and size as  ``src`` .
-
-    :param rect: Rectangular window.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::evenLevels
--------------------
-Computes levels with even distribution.
-
-.. ocv:function:: void gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)
-
-    :param levels: Destination array.  ``levels`` has 1 row, ``nLevels`` columns, and the ``CV_32SC1`` type.
-
-    :param nLevels: Number of computed levels.  ``nLevels`` must be at least 2.
-
-    :param lowerLevel: Lower boundary value of the lowest level.
-
-    :param upperLevel: Upper boundary value of the greatest level.
-
-
-
-gpu::histEven
------------------
-Calculates a histogram with evenly distributed bins.
-
-.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() )
-
-.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() )
-
-    :param src: Source image. ``CV_8U``, ``CV_16U``, or ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately.
-
-    :param hist: Destination histogram with one row, ``histSize`` columns, and the ``CV_32S`` type.
-
-    :param histSize: Size of the histogram.
-
-    :param lowerLevel: Lower boundary of lowest-level bin.
-
-    :param upperLevel: Upper boundary of highest-level bin.
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::histRange
-------------------
-Calculates a histogram with bins determined by the ``levels`` array.
-
-.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null())
-
-    :param src: Source image. ``CV_8U`` , ``CV_16U`` , or  ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately.
-
-    :param hist: Destination histogram with one row, ``(levels.cols-1)`` columns, and the  ``CV_32SC1`` type.
-
-    :param levels: Number of levels in the histogram.
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::calcHist
-------------------
-Calculates histogram for one channel 8-bit image.
-
-.. ocv:function:: void gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null())
-
-    :param src: Source image.
-
-    :param hist: Destination histogram with one row, 256 columns, and the  ``CV_32SC1`` type.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::equalizeHist
-------------------
-Equalizes the histogram of a grayscale image.
-
-.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null())
-
-    :param src: Source image.
-
-    :param dst: Destination image.
-
-    :param hist: Destination histogram with one row, 256 columns, and the  ``CV_32SC1`` type.
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`equalizeHist`
-
-
-
-gpu::blendLinear
--------------------
-Performs linear blending of two images.
-
-.. ocv:function:: void gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, GpuMat& result, Stream& stream = Stream::Null())
-
-    :param img1: First image. Supports only ``CV_8U`` and ``CV_32F`` depth.
-
-    :param img2: Second image. Must have the same size and the same type as ``img1`` .
-
-    :param weights1: Weights for first image. Must have tha same size as ``img1`` . Supports only ``CV_32F`` type.
-
-    :param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.
-
-    :param result: Destination image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-gpu::bilateralFilter
---------------------
-Performs bilateral filtering of passed image
-
-.. ocv:function:: void gpu::bilateralFilter( const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode=BORDER_DEFAULT, Stream& stream=Stream::Null() )
-
-    :param src: Source image. Supports only (channles != 2 && depth() != CV_8S && depth() != CV_32S && depth() != CV_64F).
-
-    :param dst: Destination imagwe.
-
-    :param kernel_size: Kernel window size.
-
-    :param sigma_color: Filter sigma in the color space.
-
-    :param sigma_spatial:  Filter sigma in the coordinate space.
-
-    :param borderMode:  Border type. See :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso::
-
-    :ocv:func:`bilateralFilter`
-
-
-
-gpu::alphaComp
--------------------
-Composites two images using alpha opacity values contained in each image.
-
-.. ocv:function:: void gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null())
-
-    :param img1: First image. Supports ``CV_8UC4`` , ``CV_16UC4`` , ``CV_32SC4`` and ``CV_32FC4`` types.
-
-    :param img2: Second image. Must have the same size and the same type as ``img1`` .
-
-    :param dst: Destination image.
-
-    :param alpha_op: Flag specifying the alpha-blending operation:
-
-            * **ALPHA_OVER**
-            * **ALPHA_IN**
-            * **ALPHA_OUT**
-            * **ALPHA_ATOP**
-            * **ALPHA_XOR**
-            * **ALPHA_PLUS**
-            * **ALPHA_OVER_PREMUL**
-            * **ALPHA_IN_PREMUL**
-            * **ALPHA_OUT_PREMUL**
-            * **ALPHA_ATOP_PREMUL**
-            * **ALPHA_XOR_PREMUL**
-            * **ALPHA_PLUS_PREMUL**
-            * **ALPHA_PREMUL**
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::Canny
--------------------
-Finds edges in an image using the [Canny86]_ algorithm.
-
-.. ocv:function:: void gpu::Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-
-.. ocv:function:: void gpu::Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
-
-.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
-
-.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
-
-    :param image: Single-channel 8-bit input image.
-
-    :param dx: First derivative of image in the vertical direction. Support only ``CV_32S`` type.
-
-    :param dy: First derivative of image in the horizontal direction. Support only ``CV_32S`` type.
-
-    :param edges: Output edge map. It has the same size and type as  ``image`` .
-
-    :param low_thresh: First threshold for the hysteresis procedure.
-
-    :param high_thresh: Second threshold for the hysteresis procedure.
-
-    :param apperture_size: Aperture size for the  :ocv:func:`Sobel`  operator.
-
-    :param L2gradient: Flag indicating whether a more accurate  :math:`L_2`  norm  :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}`  should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default  :math:`L_1`  norm  :math:`=|dI/dx|+|dI/dy|`  is enough ( ``L2gradient=false`` ).
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-.. seealso:: :ocv:func:`Canny`
-
-
-
-gpu::HoughLines
----------------
-Finds lines in a binary image using the classical Hough transform.
-
-.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096)
-
-.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096)
-
-    :param src: 8-bit, single-channel binary source image.
-
-    :param lines: Output vector of lines. Each line is represented by a two-element vector  :math:`(\rho, \theta)` .  :math:`\rho`  is the distance from the coordinate origin  :math:`(0,0)`  (top-left corner of the image).  :math:`\theta`  is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ).
-
-    :param rho: Distance resolution of the accumulator in pixels.
-
-    :param theta: Angle resolution of the accumulator in radians.
-
-    :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ).
-
-    :param doSort: Performs lines sort by votes.
-
-    :param maxLines: Maximum number of output lines.
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-.. seealso:: :ocv:func:`HoughLines`
-
-
-
-gpu::HoughLinesDownload
------------------------
-Downloads results from :ocv:func:`gpu::HoughLines` to host memory.
-
-.. ocv:function:: void gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray())
-
-    :param d_lines: Result of :ocv:func:`gpu::HoughLines` .
-
-    :param h_lines: Output host array.
-
-    :param h_votes: Optional output array for line's votes.
-
-.. seealso:: :ocv:func:`gpu::HoughLines`
-
-
-
-gpu::HoughCircles
------------------
-Finds circles in a grayscale image using the Hough transform.
-
-.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
-
-.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
-
-    :param src: 8-bit, single-channel grayscale input image.
-
-    :param circles: Output vector of found circles. Each vector is encoded as a 3-element floating-point vector  :math:`(x, y, radius)` .
-
-    :param method: Detection method to use. Currently, the only implemented method is  ``CV_HOUGH_GRADIENT`` , which is basically  *21HT* , described in  [Yuen90]_.
-
-    :param dp: Inverse ratio of the accumulator resolution to the image resolution. For example, if  ``dp=1`` , the accumulator has the same resolution as the input image. If  ``dp=2`` , the accumulator has half as big width and height.
-
-    :param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed.
-
-    :param cannyThreshold: The higher threshold of the two passed to  the :ocv:func:`gpu::Canny`  edge detector (the lower one is twice smaller).
-
-    :param votesThreshold: The accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected.
-
-    :param minRadius: Minimum circle radius.
-
-    :param maxRadius: Maximum circle radius.
-
-    :param maxCircles: Maximum number of output circles.
-
-    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
-
-.. seealso:: :ocv:func:`HoughCircles`
-
-
-
-gpu::HoughCirclesDownload
--------------------------
-Downloads results from :ocv:func:`gpu::HoughCircles` to host memory.
-
-.. ocv:function:: void gpu::HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles)
-
-    :param d_circles: Result of :ocv:func:`gpu::HoughCircles` .
-
-    :param h_circles: Output host array.
-
-.. seealso:: :ocv:func:`gpu::HoughCircles`
-
-
-
-gpu::GoodFeaturesToTrackDetector_GPU
-------------------------------------
-.. ocv:class:: gpu::GoodFeaturesToTrackDetector_GPU
-
-Class used for strong corners detection on an image. ::
-
-    class GoodFeaturesToTrackDetector_GPU
-    {
-    public:
-        explicit GoodFeaturesToTrackDetector_GPU(int maxCorners_ = 1000, double qualityLevel_ = 0.01, double minDistance_ = 0.0,
-            int blockSize_ = 3, bool useHarrisDetector_ = false, double harrisK_ = 0.04);
-
-        void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
-
-        int maxCorners;
-        double qualityLevel;
-        double minDistance;
-
-        int blockSize;
-        bool useHarrisDetector;
-        double harrisK;
-
-        void releaseMemory();
-    };
-
-The class finds the most prominent corners in the image.
-
-.. seealso:: :ocv:func:`goodFeaturesToTrack`
diff --git a/modules/gpuimgproc/doc/imgproc.rst b/modules/gpuimgproc/doc/imgproc.rst
new file mode 100644
index 000000000..cd91afecf
--- /dev/null
+++ b/modules/gpuimgproc/doc/imgproc.rst
@@ -0,0 +1,203 @@
+Image Processing
+================
+
+.. highlight:: cpp
+
+
+
+gpu::meanShiftFiltering
+---------------------------
+Performs mean-shift filtering for each point of the source image.
+
+.. ocv:function:: void gpu::meanShiftFiltering( const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() )
+
+    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
+
+    :param dst: Destination image containing the color of mapped points. It has the same size and type as  ``src`` .
+
+    :param sp: Spatial window radius.
+
+    :param sr: Color window radius.
+
+    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
+
+It maps each point of the source image into another point. As a result, you have a new color and new position of each point.
+
+
+
+gpu::meanShiftProc
+----------------------
+Performs a mean-shift procedure and stores information about processed points (their colors and positions) in two images.
+
+.. ocv:function:: void gpu::meanShiftProc( const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() )
+
+    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
+
+    :param dstr: Destination image containing the color of mapped points. The size and type is the same as  ``src`` .
+
+    :param dstsp: Destination image containing the position of mapped points. The size is the same as  ``src`` size. The type is  ``CV_16SC2`` .
+
+    :param sp: Spatial window radius.
+
+    :param sr: Color window radius.
+
+    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
+
+.. seealso:: :ocv:func:`gpu::meanShiftFiltering`
+
+
+
+gpu::meanShiftSegmentation
+------------------------------
+Performs a mean-shift segmentation of the source image and eliminates small segments.
+
+.. ocv:function:: void gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
+
+    :param src: Source image. Only  ``CV_8UC4`` images are supported for now.
+
+    :param dst: Segmented image with the same size and type as  ``src`` .
+
+    :param sp: Spatial window radius.
+
+    :param sr: Color window radius.
+
+    :param minsize: Minimum segment size. Smaller segments are merged.
+
+    :param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
+
+
+
+gpu::MatchTemplateBuf
+---------------------
+.. ocv:struct:: gpu::MatchTemplateBuf
+
+Class providing memory buffers for :ocv:func:`gpu::matchTemplate` function, plus it allows to adjust some specific parameters. ::
+
+    struct CV_EXPORTS MatchTemplateBuf
+    {
+        Size user_block_size;
+        GpuMat imagef, templf;
+        std::vector<GpuMat> images;
+        std::vector<GpuMat> image_sums;
+        std::vector<GpuMat> image_sqsums;
+    };
+
+You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::matchTemplate` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
+
+
+
+gpu::matchTemplate
+----------------------
+Computes a proximity map for a raster template and an image where the template is searched for.
+
+.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null())
+
+.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null())
+
+    :param image: Source image.  ``CV_32F`` and  ``CV_8U`` depth images (1..4 channels) are supported for now.
+
+    :param templ: Template image with the size and type the same as  ``image`` .
+
+    :param result: Map containing comparison results ( ``CV_32FC1`` ). If  ``image`` is  *W x H*  and ``templ`` is  *w x h*, then  ``result`` must be *W-w+1 x H-h+1*.
+
+    :param method: Specifies the way to compare the template with the image.
+
+    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::MatchTemplateBuf`.
+
+    :param stream: Stream for the asynchronous version.
+
+    The following methods are supported for the ``CV_8U`` depth images for now:
+
+    * ``CV_TM_SQDIFF``
+    * ``CV_TM_SQDIFF_NORMED``
+    * ``CV_TM_CCORR``
+    * ``CV_TM_CCORR_NORMED``
+    * ``CV_TM_CCOEFF``
+    * ``CV_TM_CCOEFF_NORMED``
+
+    The following methods are supported for the ``CV_32F`` images for now:
+
+    * ``CV_TM_SQDIFF``
+    * ``CV_TM_CCORR``
+
+.. seealso:: :ocv:func:`matchTemplate`
+
+
+
+gpu::Canny
+-------------------
+Finds edges in an image using the [Canny86]_ algorithm.
+
+.. ocv:function:: void gpu::Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
+
+.. ocv:function:: void gpu::Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
+
+.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
+
+.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
+
+    :param image: Single-channel 8-bit input image.
+
+    :param dx: First derivative of image in the vertical direction. Support only ``CV_32S`` type.
+
+    :param dy: First derivative of image in the horizontal direction. Support only ``CV_32S`` type.
+
+    :param edges: Output edge map. It has the same size and type as  ``image`` .
+
+    :param low_thresh: First threshold for the hysteresis procedure.
+
+    :param high_thresh: Second threshold for the hysteresis procedure.
+
+    :param apperture_size: Aperture size for the  :ocv:func:`Sobel`  operator.
+
+    :param L2gradient: Flag indicating whether a more accurate  :math:`L_2`  norm  :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}`  should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default  :math:`L_1`  norm  :math:`=|dI/dx|+|dI/dy|`  is enough ( ``L2gradient=false`` ).
+
+    :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+
+.. seealso:: :ocv:func:`Canny`
+
+
+
+gpu::bilateralFilter
+--------------------
+Performs bilateral filtering of passed image
+
+.. ocv:function:: void gpu::bilateralFilter( const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode=BORDER_DEFAULT, Stream& stream=Stream::Null() )
+
+    :param src: Source image. Supports only (channles != 2 && depth() != CV_8S && depth() != CV_32S && depth() != CV_64F).
+
+    :param dst: Destination imagwe.
+
+    :param kernel_size: Kernel window size.
+
+    :param sigma_color: Filter sigma in the color space.
+
+    :param sigma_spatial:  Filter sigma in the coordinate space.
+
+    :param borderMode:  Border type. See :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso::
+
+    :ocv:func:`bilateralFilter`
+
+
+
+gpu::blendLinear
+-------------------
+Performs linear blending of two images.
+
+.. ocv:function:: void gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, GpuMat& result, Stream& stream = Stream::Null())
+
+    :param img1: First image. Supports only ``CV_8U`` and ``CV_32F`` depth.
+
+    :param img2: Second image. Must have the same size and the same type as ``img1`` .
+
+    :param weights1: Weights for first image. Must have tha same size as ``img1`` . Supports only ``CV_32F`` type.
+
+    :param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.
+
+    :param result: Destination image.
+
+    :param stream: Stream for the asynchronous version.
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
index 5bfaa3b5c..809fdb91b 100644
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -48,31 +48,13 @@
 #endif
 
 #include "opencv2/core/gpumat.hpp"
-#include "opencv2/gpufilters.hpp"
+#include "opencv2/core/base.hpp"
 #include "opencv2/imgproc.hpp"
+#include "opencv2/gpufilters.hpp"
 
 namespace cv { namespace gpu {
 
-enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
-       ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
-
-//! Composite two images using alpha opacity values contained in each image
-//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
-CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
-
-//! Does mean shift filtering on GPU.
-CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
-                                   TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
-                                   Stream& stream = Stream::Null());
-
-//! Does mean shift procedure on GPU.
-CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
-                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
-                              Stream& stream = Stream::Null());
-
-//! Does mean shift segmentation with elimination of small regions.
-CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
-                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+/////////////////////////// Color Processing ///////////////////////////
 
 //! converts image from one color space to another
 CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
@@ -107,41 +89,65 @@ CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& strea
 //! Routines for correcting image color gamma
 CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
 
-//! computes Harris cornerness criteria at each image pixel
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
-CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
-                             int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
+enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
+       ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
 
-//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
-CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
-    int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
+//! Composite two images using alpha opacity values contained in each image
+//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
+CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
 
-struct CV_EXPORTS MatchTemplateBuf
+////////////////////////////// Histogram ///////////////////////////////
+
+//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
+CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel);
+
+//! Calculates histogram with evenly distributed bins for signle channel source.
+//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
+//! Output hist will have one row and histSize cols and CV_32SC1 type.
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
+
+//! Calculates histogram with evenly distributed bins for four-channel source.
+//! All channels of source are processed separately.
+//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
+//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
+CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
+
+//! Calculates histogram with bins determined by levels array.
+//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
+//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null());
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null());
+
+//! Calculates histogram with bins determined by levels array.
+//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+//! All channels of source are processed separately.
+//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
+//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
+CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null());
+
+//! Calculates histogram for 8u one channel image
+//! Output hist will have one row, 256 cols and CV32SC1 type.
+CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null());
+CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+
+//! normalizes the grayscale image brightness and contrast by normalizing its histogram
+CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
+CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+
+class CV_EXPORTS CLAHE : public cv::CLAHE
 {
-    Size user_block_size;
-    GpuMat imagef, templf;
-    std::vector<GpuMat> images;
-    std::vector<GpuMat> image_sums;
-    std::vector<GpuMat> image_sqsums;
+public:
+    using cv::CLAHE::apply;
+    virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
 };
+CV_EXPORTS Ptr<cv::gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 
-//! computes the proximity map for the raster template and the image where the template is searched for
-CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null());
-
-//! computes the proximity map for the raster template and the image where the template is searched for
-CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null());
-
-//! performs linear blending of two images
-//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
-CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
-                            GpuMat& result, Stream& stream = Stream::Null());
-
-//! Performa bilateral filtering of passsed image
-CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial,
-                                int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());
+//////////////////////////////// Canny ////////////////////////////////
 
 struct CV_EXPORTS CannyBuf
 {
@@ -160,7 +166,7 @@ CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double
 CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
 CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
 
-//! HoughLines
+/////////////////////////// Hough Transform ////////////////////////////
 
 struct HoughLinesBuf
 {
@@ -172,13 +178,9 @@ CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float th
 CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
 CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
 
-//! HoughLinesP
-
 //! finds line segments in the black-n-white image using probabalistic Hough transform
 CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);
 
-//! HoughCircles
-
 struct HoughCirclesBuf
 {
     GpuMat edges;
@@ -223,50 +225,21 @@ private:
     CannyBuf cannyBuf_;
 };
 
-//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
-CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel);
-//! Calculates histogram with evenly distributed bins for signle channel source.
-//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
-//! Output hist will have one row and histSize cols and CV_32SC1 type.
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
-//! Calculates histogram with evenly distributed bins for four-channel source.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
-//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
-CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
-//! Calculates histogram with bins determined by levels array.
-//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
-//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null());
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null());
-//! Calculates histogram with bins determined by levels array.
-//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
-//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
-CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null());
+////////////////////////// Corners Detection ///////////////////////////
 
-//! Calculates histogram for 8u one channel image
-//! Output hist will have one row, 256 cols and CV32SC1 type.
-CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null());
-CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+//! computes Harris cornerness criteria at each image pixel
+CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
+CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
+CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
+                             int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
 
-//! normalizes the grayscale image brightness and contrast by normalizing its histogram
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
-CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
+CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
+CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
+CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
+    int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
 
-class CV_EXPORTS CLAHE : public cv::CLAHE
-{
-public:
-    using cv::CLAHE::apply;
-    virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
-};
-CV_EXPORTS Ptr<cv::gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+////////////////////////// Feature Detection ///////////////////////////
 
 class CV_EXPORTS GoodFeaturesToTrackDetector_GPU
 {
@@ -315,6 +288,52 @@ inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxC
     harrisK = harrisK_;
 }
 
+///////////////////////////// Mean Shift //////////////////////////////
+
+//! Does mean shift filtering on GPU.
+CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
+                                   TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
+                                   Stream& stream = Stream::Null());
+
+//! Does mean shift procedure on GPU.
+CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
+                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
+                              Stream& stream = Stream::Null());
+
+//! Does mean shift segmentation with elimination of small regions.
+CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
+                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+/////////////////////////// Match Template ////////////////////////////
+
+struct CV_EXPORTS MatchTemplateBuf
+{
+    Size user_block_size;
+    GpuMat imagef, templf;
+    std::vector<GpuMat> images;
+    std::vector<GpuMat> image_sums;
+    std::vector<GpuMat> image_sqsums;
+};
+
+//! computes the proximity map for the raster template and the image where the template is searched for
+CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null());
+
+//! computes the proximity map for the raster template and the image where the template is searched for
+CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null());
+
+////////////////////////// Bilateral Filter ///////////////////////////
+
+//! Performa bilateral filtering of passsed image
+CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial,
+                                int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());
+
+///////////////////////////// Blending ////////////////////////////////
+
+//! performs linear blending of two images
+//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
+                            GpuMat& result, Stream& stream = Stream::Null());
+
 }} // namespace cv { namespace gpu {
 
 #endif /* __OPENCV_GPUIMGPROC_HPP__ */
diff --git a/modules/gpuimgproc/perf/perf_bilateral_filter.cpp b/modules/gpuimgproc/perf/perf_bilateral_filter.cpp
new file mode 100644
index 000000000..1787fdc09
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_bilateral_filter.cpp
@@ -0,0 +1,93 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// BilateralFilter
+
+DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
+
+PERF_TEST_P(Sz_Depth_Cn_KernelSz, BilateralFilter,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_32F),
+                    GPU_CHANNELS_1_3,
+                    Values(3, 5, 9)))
+{
+    declare.time(60.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int kernel_size = GET_PARAM(3);
+
+    const float sigma_color = 7;
+    const float sigma_spatial = 5;
+    const int borderMode = cv::BORDER_REFLECT101;
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_blend.cpp b/modules/gpuimgproc/perf/perf_blend.cpp
new file mode 100644
index 000000000..5d4381768
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_blend.cpp
@@ -0,0 +1,86 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// BlendLinear
+
+PERF_TEST_P(Sz_Depth_Cn, BlendLinear,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+
+    const int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat img1(size, type);
+    cv::Mat img2(size, type);
+    declare.in(img1, img2, WARMUP_RNG);
+
+    const cv::Mat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
+    const cv::Mat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_img1(img1);
+        const cv::gpu::GpuMat d_img2(img2);
+        const cv::gpu::GpuMat d_weights1(weights1);
+        const cv::gpu::GpuMat d_weights2(weights2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::blendLinear(d_img1, d_img2, d_weights1, d_weights2, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_canny.cpp b/modules/gpuimgproc/perf/perf_canny.cpp
new file mode 100644
index 000000000..ce6db2bb3
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_canny.cpp
@@ -0,0 +1,87 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// Canny
+
+DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool);
+
+PERF_TEST_P(Image_AppertureSz_L2gradient, Canny,
+            Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"),
+                    Values(3, 5),
+                    Bool()))
+{
+    const string fileName = GET_PARAM(0);
+    const int apperture_size = GET_PARAM(1);
+    const bool useL2gradient = GET_PARAM(2);
+
+    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    const double low_thresh = 50.0;
+    const double high_thresh = 100.0;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_image(image);
+        cv::gpu::GpuMat dst;
+        cv::gpu::CannyBuf d_buf;
+
+        TEST_CYCLE() cv::gpu::Canny(d_image, d_buf, dst, low_thresh, high_thresh, apperture_size, useL2gradient);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::Canny(image, dst, low_thresh, high_thresh, apperture_size, useL2gradient);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_color.cpp b/modules/gpuimgproc/perf/perf_color.cpp
new file mode 100644
index 000000000..1df324816
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_color.cpp
@@ -0,0 +1,252 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// CvtColor
+
+DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CvtColorInfo);
+
+PERF_TEST_P(Sz_Depth_Code, CvtColor,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_32F),
+                    Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
+                           CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
+                           CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
+                           CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
+                           CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
+                           CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
+                           CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
+                           CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
+                           CvtColorInfo(3, 3, cv::COLOR_LBGR2Lab),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
+                           CvtColorInfo(3, 3, cv::COLOR_LBGR2Luv),
+                           CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_Lab2LBGR),
+                           CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
+                           CvtColorInfo(3, 3, cv::COLOR_Luv2LRGB))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const CvtColorInfo info = GET_PARAM(2);
+
+    cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
+    cv::randu(src, 0, depth == CV_8U ? 255.0 : 1.0);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
+
+        GPU_SANITY_CHECK(dst, 1e-4);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+PERF_TEST_P(Sz_Depth_Code, CvtColorBayer,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U),
+                    Values(CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
+                           CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
+                           CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
+                           CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
+
+                           CvtColorInfo(1, 1, cv::COLOR_BayerBG2GRAY),
+                           CvtColorInfo(1, 1, cv::COLOR_BayerGB2GRAY),
+                           CvtColorInfo(1, 1, cv::COLOR_BayerRG2GRAY),
+                           CvtColorInfo(1, 1, cv::COLOR_BayerGR2GRAY))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const CvtColorInfo info = GET_PARAM(2);
+
+    cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Demosaicing
+
+CV_ENUM(DemosaicingCode,
+        cv::COLOR_BayerBG2BGR, cv::COLOR_BayerGB2BGR, cv::COLOR_BayerRG2BGR, cv::COLOR_BayerGR2BGR,
+        cv::COLOR_BayerBG2GRAY, cv::COLOR_BayerGB2GRAY, cv::COLOR_BayerRG2GRAY, cv::COLOR_BayerGR2GRAY,
+        cv::gpu::COLOR_BayerBG2BGR_MHT, cv::gpu::COLOR_BayerGB2BGR_MHT, cv::gpu::COLOR_BayerRG2BGR_MHT, cv::gpu::COLOR_BayerGR2BGR_MHT,
+        cv::gpu::COLOR_BayerBG2GRAY_MHT, cv::gpu::COLOR_BayerGB2GRAY_MHT, cv::gpu::COLOR_BayerRG2GRAY_MHT, cv::gpu::COLOR_BayerGR2GRAY_MHT)
+
+DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode);
+
+PERF_TEST_P(Sz_Code, Demosaicing,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    DemosaicingCode::all()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int code = GET_PARAM(1);
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::demosaicing(d_src, dst, code);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        if (code >= cv::COLOR_COLORCVT_MAX)
+        {
+            FAIL_NO_CPU();
+        }
+        else
+        {
+            cv::Mat dst;
+
+            TEST_CYCLE() cv::cvtColor(src, dst, code);
+
+            CPU_SANITY_CHECK(dst);
+        }
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SwapChannels
+
+PERF_TEST_P(Sz, SwapChannels,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC4);
+    declare.in(src, WARMUP_RNG);
+
+    const int dstOrder[] = {2, 1, 0, 3};
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat dst(src);
+
+        TEST_CYCLE() cv::gpu::swapChannels(dst, dstOrder);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// AlphaComp
+
+CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+
+DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, AlphaOp);
+
+PERF_TEST_P(Sz_Type_Op, AlphaComp,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
+                    AlphaOp::all()))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int alpha_op = GET_PARAM(2);
+
+    cv::Mat img1(size, type);
+    cv::Mat img2(size, type);
+    declare.in(img1, img2, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_img1(img1);
+        const cv::gpu::GpuMat d_img2(img2);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::alphaComp(d_img1, d_img2, dst, alpha_op);
+
+        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_corners.cpp b/modules/gpuimgproc/perf/perf_corners.cpp
new file mode 100644
index 000000000..28e8806e5
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_corners.cpp
@@ -0,0 +1,137 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// CornerHarris
+
+DEF_PARAM_TEST(Image_Type_Border_BlockSz_ApertureSz, string, MatType, BorderMode, int, int);
+
+PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerHarris,
+            Combine(Values<string>("gpu/stereobm/aloe-L.png"),
+                    Values(CV_8UC1, CV_32FC1),
+                    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+                    Values(3, 5, 7),
+                    Values(0, 3, 5, 7)))
+{
+    const string fileName = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int borderMode = GET_PARAM(2);
+    const int blockSize = GET_PARAM(3);
+    const int apertureSize = GET_PARAM(4);
+
+    cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+
+    const double k = 0.5;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_img(img);
+        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat d_Dx;
+        cv::gpu::GpuMat d_Dy;
+        cv::gpu::GpuMat d_buf;
+
+        TEST_CYCLE() cv::gpu::cornerHarris(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode);
+
+        GPU_SANITY_CHECK(dst, 1e-4);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// CornerMinEigenVal
+
+PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerMinEigenVal,
+            Combine(Values<string>("gpu/stereobm/aloe-L.png"),
+                    Values(CV_8UC1, CV_32FC1),
+                    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+                    Values(3, 5, 7),
+                    Values(0, 3, 5, 7)))
+{
+    const string fileName = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int borderMode = GET_PARAM(2);
+    const int blockSize = GET_PARAM(3);
+    const int apertureSize = GET_PARAM(4);
+
+    cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_img(img);
+        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat d_Dx;
+        cv::gpu::GpuMat d_Dy;
+        cv::gpu::GpuMat d_buf;
+
+        TEST_CYCLE() cv::gpu::cornerMinEigenVal(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode);
+
+        GPU_SANITY_CHECK(dst, 1e-4);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderMode);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_gftt.cpp b/modules/gpuimgproc/perf/perf_gftt.cpp
new file mode 100644
index 000000000..982182d17
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_gftt.cpp
@@ -0,0 +1,86 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+
+DEF_PARAM_TEST(Image_MinDistance, string, double);
+
+PERF_TEST_P(Image_MinDistance, GoodFeaturesToTrack,
+            Combine(Values<string>("gpu/perf/aloe.png"),
+                    Values(0.0, 3.0)))
+{
+    const string fileName = GET_PARAM(0);
+    const double minDistance = GET_PARAM(1);
+
+    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    const int maxCorners = 8000;
+    const double qualityLevel = 0.01;
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance);
+
+        const cv::gpu::GpuMat d_image(image);
+        cv::gpu::GpuMat pts;
+
+        TEST_CYCLE() d_detector(d_image, pts);
+
+        GPU_SANITY_CHECK(pts);
+    }
+    else
+    {
+        cv::Mat pts;
+
+        TEST_CYCLE() cv::goodFeaturesToTrack(image, pts, maxCorners, qualityLevel, minDistance);
+
+        CPU_SANITY_CHECK(pts);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_histogram.cpp b/modules/gpuimgproc/perf/perf_histogram.cpp
new file mode 100644
index 000000000..51f7416f9
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_histogram.cpp
@@ -0,0 +1,221 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// HistEvenC1
+
+PERF_TEST_P(Sz_Depth, HistEvenC1,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_16S)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, depth);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat d_buf;
+
+        TEST_CYCLE() cv::gpu::histEven(d_src, dst, d_buf, 30, 0, 180);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        const int hbins = 30;
+        const float hranges[] = {0.0f, 180.0f};
+        const int histSize[] = {hbins};
+        const float* ranges[] = {hranges};
+        const int channels[] = {0};
+
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::calcHist(&src, 1, channels, cv::Mat(), dst, 1, histSize, ranges);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// HistEvenC4
+
+PERF_TEST_P(Sz_Depth, HistEvenC4,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_16S)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+
+    cv::Mat src(size, CV_MAKE_TYPE(depth, 4));
+    declare.in(src, WARMUP_RNG);
+
+    int histSize[] = {30, 30, 30, 30};
+    int lowerLevel[] = {0, 0, 0, 0};
+    int upperLevel[] = {180, 180, 180, 180};
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_hist[4];
+        cv::gpu::GpuMat d_buf;
+
+        TEST_CYCLE() cv::gpu::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
+
+        cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
+        d_hist[0].download(cpu_hist0);
+        d_hist[1].download(cpu_hist1);
+        d_hist[2].download(cpu_hist2);
+        d_hist[3].download(cpu_hist3);
+        SANITY_CHECK(cpu_hist0);
+        SANITY_CHECK(cpu_hist1);
+        SANITY_CHECK(cpu_hist2);
+        SANITY_CHECK(cpu_hist3);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// CalcHist
+
+PERF_TEST_P(Sz, CalcHist,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::calcHist(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// EqualizeHist
+
+PERF_TEST_P(Sz, EqualizeHist,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat d_hist;
+        cv::gpu::GpuMat d_buf;
+
+        TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_hist, d_buf);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::equalizeHist(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// CLAHE
+
+DEF_PARAM_TEST(Sz_ClipLimit, cv::Size, double);
+
+PERF_TEST_P(Sz_ClipLimit, CLAHE,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(0.0, 40.0)))
+{
+    const cv::Size size = GET_PARAM(0);
+    const double clipLimit = GET_PARAM(1);
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        cv::Ptr<cv::gpu::CLAHE> clahe = cv::gpu::createCLAHE(clipLimit);
+        cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() clahe->apply(d_src, dst);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit);
+        cv::Mat dst;
+
+        TEST_CYCLE() clahe->apply(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_hough.cpp b/modules/gpuimgproc/perf/perf_hough.cpp
new file mode 100644
index 000000000..a4aac0d02
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_hough.cpp
@@ -0,0 +1,317 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// HoughLines
+
+namespace
+{
+    struct Vec4iComparator
+    {
+        bool operator()(const cv::Vec4i& a, const cv::Vec4i b) const
+        {
+            if (a[0] != b[0]) return a[0] < b[0];
+            else if(a[1] != b[1]) return a[1] < b[1];
+            else if(a[2] != b[2]) return a[2] < b[2];
+            else return a[3] < b[3];
+        }
+    };
+    struct Vec3fComparator
+    {
+        bool operator()(const cv::Vec3f& a, const cv::Vec3f b) const
+        {
+            if(a[0] != b[0]) return a[0] < b[0];
+            else if(a[1] != b[1]) return a[1] < b[1];
+            else return a[2] < b[2];
+        }
+    };
+    struct Vec2fComparator
+    {
+        bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const
+        {
+            if(a[0] != b[0]) return a[0] < b[0];
+            else return a[1] < b[1];
+        }
+    };
+}
+
+PERF_TEST_P(Sz, HoughLines,
+            GPU_TYPICAL_MAT_SIZES)
+{
+    declare.time(30.0);
+
+    const cv::Size size = GetParam();
+
+    const float rho = 1.0f;
+    const float theta = static_cast<float>(CV_PI / 180.0);
+    const int threshold = 300;
+
+    cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
+    cv::line(src, cv::Point(0, 100), cv::Point(src.cols, 100), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(0, 200), cv::Point(src.cols, 200), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(0, 400), cv::Point(src.cols, 400), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(100, 0), cv::Point(100, src.rows), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(200, 0), cv::Point(200, src.rows), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(400, 0), cv::Point(400, src.rows), cv::Scalar::all(255), 1);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_lines;
+        cv::gpu::HoughLinesBuf d_buf;
+
+        TEST_CYCLE() cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold);
+
+        cv::Mat gpu_lines(d_lines.row(0));
+        cv::Vec2f* begin = gpu_lines.ptr<cv::Vec2f>(0);
+        cv::Vec2f* end = begin + gpu_lines.cols;
+        std::sort(begin, end, Vec2fComparator());
+        SANITY_CHECK(gpu_lines);
+    }
+    else
+    {
+        std::vector<cv::Vec2f> cpu_lines;
+
+        TEST_CYCLE() cv::HoughLines(src, cpu_lines, rho, theta, threshold);
+
+        SANITY_CHECK(cpu_lines);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// HoughLinesP
+
+DEF_PARAM_TEST_1(Image, std::string);
+
+PERF_TEST_P(Image, HoughLinesP,
+            testing::Values("cv/shared/pic5.png", "stitching/a1.png"))
+{
+    declare.time(30.0);
+
+    const std::string fileName = getDataPath(GetParam());
+
+    const float rho = 1.0f;
+    const float theta = static_cast<float>(CV_PI / 180.0);
+    const int threshold = 100;
+    const int minLineLenght = 50;
+    const int maxLineGap = 5;
+
+    const cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    cv::Mat mask;
+    cv::Canny(image, mask, 50, 100);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_mask(mask);
+        cv::gpu::GpuMat d_lines;
+        cv::gpu::HoughLinesBuf d_buf;
+
+        TEST_CYCLE() cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
+
+        cv::Mat gpu_lines(d_lines);
+        cv::Vec4i* begin = gpu_lines.ptr<cv::Vec4i>();
+        cv::Vec4i* end = begin + gpu_lines.cols;
+        std::sort(begin, end, Vec4iComparator());
+        SANITY_CHECK(gpu_lines);
+    }
+    else
+    {
+        std::vector<cv::Vec4i> cpu_lines;
+
+        TEST_CYCLE() cv::HoughLinesP(mask, cpu_lines, rho, theta, threshold, minLineLenght, maxLineGap);
+
+        SANITY_CHECK(cpu_lines);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// HoughCircles
+
+DEF_PARAM_TEST(Sz_Dp_MinDist, cv::Size, float, float);
+
+PERF_TEST_P(Sz_Dp_MinDist, HoughCircles,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(1.0f, 2.0f, 4.0f),
+                    Values(1.0f)))
+{
+    declare.time(30.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const float dp = GET_PARAM(1);
+    const float minDist = GET_PARAM(2);
+
+    const int minRadius = 10;
+    const int maxRadius = 30;
+    const int cannyThreshold = 100;
+    const int votesThreshold = 15;
+
+    cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
+    cv::circle(src, cv::Point(100, 100), 20, cv::Scalar::all(255), -1);
+    cv::circle(src, cv::Point(200, 200), 25, cv::Scalar::all(255), -1);
+    cv::circle(src, cv::Point(200, 100), 25, cv::Scalar::all(255), -1);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat d_circles;
+        cv::gpu::HoughCirclesBuf d_buf;
+
+        TEST_CYCLE() cv::gpu::HoughCircles(d_src, d_circles, d_buf, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+
+        cv::Mat gpu_circles(d_circles);
+        cv::Vec3f* begin = gpu_circles.ptr<cv::Vec3f>(0);
+        cv::Vec3f* end = begin + gpu_circles.cols;
+        std::sort(begin, end, Vec3fComparator());
+        SANITY_CHECK(gpu_circles);
+    }
+    else
+    {
+        std::vector<cv::Vec3f> cpu_circles;
+
+        TEST_CYCLE() cv::HoughCircles(src, cpu_circles, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+
+        SANITY_CHECK(cpu_circles);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// GeneralizedHough
+
+enum { GHT_POSITION = cv::GeneralizedHough::GHT_POSITION,
+       GHT_SCALE    = cv::GeneralizedHough::GHT_SCALE,
+       GHT_ROTATION = cv::GeneralizedHough::GHT_ROTATION
+     };
+
+CV_FLAGS(GHMethod, GHT_POSITION, GHT_SCALE, GHT_ROTATION);
+
+DEF_PARAM_TEST(Method_Sz, GHMethod, cv::Size);
+
+PERF_TEST_P(Method_Sz, GeneralizedHough,
+            Combine(Values(GHMethod(GHT_POSITION), GHMethod(GHT_POSITION | GHT_SCALE), GHMethod(GHT_POSITION | GHT_ROTATION), GHMethod(GHT_POSITION | GHT_SCALE | GHT_ROTATION)),
+                    GPU_TYPICAL_MAT_SIZES))
+{
+    declare.time(10);
+
+    const int method = GET_PARAM(0);
+    const cv::Size imageSize = GET_PARAM(1);
+
+    const cv::Mat templ = readImage("cv/shared/templ.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(templ.empty());
+
+    cv::Mat image(imageSize, CV_8UC1, cv::Scalar::all(0));
+    templ.copyTo(image(cv::Rect(50, 50, templ.cols, templ.rows)));
+
+    cv::RNG rng(123456789);
+    const int objCount = rng.uniform(5, 15);
+    for (int i = 0; i < objCount; ++i)
+    {
+        double scale = rng.uniform(0.7, 1.3);
+        bool rotate = 1 == rng.uniform(0, 2);
+
+        cv::Mat obj;
+        cv::resize(templ, obj, cv::Size(), scale, scale);
+        if (rotate)
+            obj = obj.t();
+
+        cv::Point pos;
+
+        pos.x = rng.uniform(0, image.cols - obj.cols);
+        pos.y = rng.uniform(0, image.rows - obj.rows);
+
+        cv::Mat roi = image(cv::Rect(pos, obj.size()));
+        cv::add(roi, obj, roi);
+    }
+
+    cv::Mat edges;
+    cv::Canny(image, edges, 50, 100);
+
+    cv::Mat dx, dy;
+    cv::Sobel(image, dx, CV_32F, 1, 0);
+    cv::Sobel(image, dy, CV_32F, 0, 1);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_edges(edges);
+        const cv::gpu::GpuMat d_dx(dx);
+        const cv::gpu::GpuMat d_dy(dy);
+        cv::gpu::GpuMat posAndVotes;
+
+        cv::Ptr<cv::gpu::GeneralizedHough_GPU> d_hough = cv::gpu::GeneralizedHough_GPU::create(method);
+        if (method & GHT_ROTATION)
+        {
+            d_hough->set("maxAngle", 90.0);
+            d_hough->set("angleStep", 2.0);
+        }
+
+        d_hough->setTemplate(cv::gpu::GpuMat(templ));
+
+        TEST_CYCLE() d_hough->detect(d_edges, d_dx, d_dy, posAndVotes);
+
+        const cv::gpu::GpuMat positions(1, posAndVotes.cols, CV_32FC4, posAndVotes.data);
+        GPU_SANITY_CHECK(positions);
+    }
+    else
+    {
+        cv::Mat positions;
+
+        cv::Ptr<cv::GeneralizedHough> hough = cv::GeneralizedHough::create(method);
+        if (method & GHT_ROTATION)
+        {
+            hough->set("maxAngle", 90.0);
+            hough->set("angleStep", 2.0);
+        }
+
+        hough->setTemplate(templ);
+
+        TEST_CYCLE() hough->detect(edges, dx, dy, positions);
+
+        CPU_SANITY_CHECK(positions);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_imgproc.cpp b/modules/gpuimgproc/perf/perf_imgproc.cpp
deleted file mode 100644
index fcfafef5c..000000000
--- a/modules/gpuimgproc/perf/perf_imgproc.cpp
+++ /dev/null
@@ -1,1133 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "perf_precomp.hpp"
-
-using namespace std;
-using namespace testing;
-using namespace perf;
-
-//////////////////////////////////////////////////////////////////////
-// HistEvenC1
-
-PERF_TEST_P(Sz_Depth, HistEvenC1,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_16S)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, depth);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_buf;
-
-        TEST_CYCLE() cv::gpu::histEven(d_src, dst, d_buf, 30, 0, 180);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        const int hbins = 30;
-        const float hranges[] = {0.0f, 180.0f};
-        const int histSize[] = {hbins};
-        const float* ranges[] = {hranges};
-        const int channels[] = {0};
-
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::calcHist(&src, 1, channels, cv::Mat(), dst, 1, histSize, ranges);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// HistEvenC4
-
-PERF_TEST_P(Sz_Depth, HistEvenC4,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_16S)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-
-    cv::Mat src(size, CV_MAKE_TYPE(depth, 4));
-    declare.in(src, WARMUP_RNG);
-
-    int histSize[] = {30, 30, 30, 30};
-    int lowerLevel[] = {0, 0, 0, 0};
-    int upperLevel[] = {180, 180, 180, 180};
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_hist[4];
-        cv::gpu::GpuMat d_buf;
-
-        TEST_CYCLE() cv::gpu::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
-
-        cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
-        d_hist[0].download(cpu_hist0);
-        d_hist[1].download(cpu_hist1);
-        d_hist[2].download(cpu_hist2);
-        d_hist[3].download(cpu_hist3);
-        SANITY_CHECK(cpu_hist0);
-        SANITY_CHECK(cpu_hist1);
-        SANITY_CHECK(cpu_hist2);
-        SANITY_CHECK(cpu_hist3);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CalcHist
-
-PERF_TEST_P(Sz, CalcHist,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::calcHist(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// EqualizeHist
-
-PERF_TEST_P(Sz, EqualizeHist,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_hist;
-        cv::gpu::GpuMat d_buf;
-
-        TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_hist, d_buf);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::equalizeHist(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-DEF_PARAM_TEST(Sz_ClipLimit, cv::Size, double);
-
-PERF_TEST_P(Sz_ClipLimit, CLAHE,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(0.0, 40.0)))
-{
-    const cv::Size size = GET_PARAM(0);
-    const double clipLimit = GET_PARAM(1);
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::Ptr<cv::gpu::CLAHE> clahe = cv::gpu::createCLAHE(clipLimit);
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() clahe->apply(d_src, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit);
-        cv::Mat dst;
-
-        TEST_CYCLE() clahe->apply(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// Canny
-
-DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool);
-
-PERF_TEST_P(Image_AppertureSz_L2gradient, Canny,
-            Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"),
-                    Values(3, 5),
-                    Bool()))
-{
-    const string fileName = GET_PARAM(0);
-    const int apperture_size = GET_PARAM(1);
-    const bool useL2gradient = GET_PARAM(2);
-
-    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
-
-    const double low_thresh = 50.0;
-    const double high_thresh = 100.0;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat dst;
-        cv::gpu::CannyBuf d_buf;
-
-        TEST_CYCLE() cv::gpu::Canny(d_image, d_buf, dst, low_thresh, high_thresh, apperture_size, useL2gradient);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::Canny(image, dst, low_thresh, high_thresh, apperture_size, useL2gradient);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MeanShiftFiltering
-
-DEF_PARAM_TEST_1(Image, string);
-
-PERF_TEST_P(Image, MeanShiftFiltering,
-            Values<string>("gpu/meanshift/cones.png"))
-{
-    declare.time(300.0);
-
-    const cv::Mat img = readImage(GetParam());
-    ASSERT_FALSE(img.empty());
-
-    cv::Mat rgba;
-    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
-
-    const int sp = 50;
-    const int sr = 50;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(rgba);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::meanShiftFiltering(d_src, dst, sp, sr);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::pyrMeanShiftFiltering(img, dst, sp, sr);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MeanShiftProc
-
-PERF_TEST_P(Image, MeanShiftProc,
-            Values<string>("gpu/meanshift/cones.png"))
-{
-    declare.time(300.0);
-
-    const cv::Mat img = readImage(GetParam());
-    ASSERT_FALSE(img.empty());
-
-    cv::Mat rgba;
-    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
-
-    const int sp = 50;
-    const int sr = 50;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(rgba);
-        cv::gpu::GpuMat dstr;
-        cv::gpu::GpuMat dstsp;
-
-        TEST_CYCLE() cv::gpu::meanShiftProc(d_src, dstr, dstsp, sp, sr);
-
-        GPU_SANITY_CHECK(dstr);
-        GPU_SANITY_CHECK(dstsp);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// MeanShiftSegmentation
-
-PERF_TEST_P(Image, MeanShiftSegmentation,
-            Values<string>("gpu/meanshift/cones.png"))
-{
-    declare.time(300.0);
-
-    const cv::Mat img = readImage(GetParam());
-    ASSERT_FALSE(img.empty());
-
-    cv::Mat rgba;
-    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
-
-    const int sp = 10;
-    const int sr = 10;
-    const int minsize = 20;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(rgba);
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::gpu::meanShiftSegmentation(d_src, dst, sp, sr, minsize);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BlendLinear
-
-PERF_TEST_P(Sz_Depth_Cn, BlendLinear,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_32F),
-                    GPU_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat img1(size, type);
-    cv::Mat img2(size, type);
-    declare.in(img1, img2, WARMUP_RNG);
-
-    const cv::Mat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
-    const cv::Mat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_img1(img1);
-        const cv::gpu::GpuMat d_img2(img2);
-        const cv::gpu::GpuMat d_weights1(weights1);
-        const cv::gpu::GpuMat d_weights2(weights2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::blendLinear(d_img1, d_img2, d_weights1, d_weights2, dst);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplate8U
-
-CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
-
-DEF_PARAM_TEST(Sz_TemplateSz_Cn_Method, cv::Size, cv::Size, MatCn, TemplateMethod);
-
-PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate8U,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
-                    GPU_CHANNELS_1_3_4,
-                    TemplateMethod::all()))
-{
-    declare.time(300.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const cv::Size templ_size = GET_PARAM(1);
-    const int cn = GET_PARAM(2);
-    const int method = GET_PARAM(3);
-
-    cv::Mat image(size, CV_MAKE_TYPE(CV_8U, cn));
-    cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_8U, cn));
-    declare.in(image, templ, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_image(image);
-        const cv::gpu::GpuMat d_templ(templ);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
-
-        GPU_SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::matchTemplate(image, templ, dst, method);
-
-        CPU_SANITY_CHECK(dst);
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplate32F
-
-PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate32F,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
-                    GPU_CHANNELS_1_3_4,
-                    Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))))
-{
-    declare.time(300.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const cv::Size templ_size = GET_PARAM(1);
-    const int cn = GET_PARAM(2);
-    int method = GET_PARAM(3);
-
-    cv::Mat image(size, CV_MAKE_TYPE(CV_32F, cn));
-    cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_32F, cn));
-    declare.in(image, templ, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_image(image);
-        const cv::gpu::GpuMat d_templ(templ);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
-
-        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::matchTemplate(image, templ, dst, method);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CornerHarris
-
-DEF_PARAM_TEST(Image_Type_Border_BlockSz_ApertureSz, string, MatType, BorderMode, int, int);
-
-PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerHarris,
-            Combine(Values<string>("gpu/stereobm/aloe-L.png"),
-                    Values(CV_8UC1, CV_32FC1),
-                    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
-                    Values(3, 5, 7),
-                    Values(0, 3, 5, 7)))
-{
-    const string fileName = GET_PARAM(0);
-    const int type = GET_PARAM(1);
-    const int borderMode = GET_PARAM(2);
-    const int blockSize = GET_PARAM(3);
-    const int apertureSize = GET_PARAM(4);
-
-    cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
-
-    const double k = 0.5;
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_Dx;
-        cv::gpu::GpuMat d_Dy;
-        cv::gpu::GpuMat d_buf;
-
-        TEST_CYCLE() cv::gpu::cornerHarris(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode);
-
-        GPU_SANITY_CHECK(dst, 1e-4);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CornerMinEigenVal
-
-PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerMinEigenVal,
-            Combine(Values<string>("gpu/stereobm/aloe-L.png"),
-                    Values(CV_8UC1, CV_32FC1),
-                    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
-                    Values(3, 5, 7),
-                    Values(0, 3, 5, 7)))
-{
-    const string fileName = GET_PARAM(0);
-    const int type = GET_PARAM(1);
-    const int borderMode = GET_PARAM(2);
-    const int blockSize = GET_PARAM(3);
-    const int apertureSize = GET_PARAM(4);
-
-    cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_Dx;
-        cv::gpu::GpuMat d_Dy;
-        cv::gpu::GpuMat d_buf;
-
-        TEST_CYCLE() cv::gpu::cornerMinEigenVal(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode);
-
-        GPU_SANITY_CHECK(dst, 1e-4);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// CvtColor
-
-DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CvtColorInfo);
-
-PERF_TEST_P(Sz_Depth_Code, CvtColor,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_32F),
-                    Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
-                           CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
-                           CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
-                           CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
-                           CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
-                           CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
-                           CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
-                           CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
-                           CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
-                           CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
-                           CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
-                           CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
-                           CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
-                           CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
-                           CvtColorInfo(3, 3, cv::COLOR_LBGR2Lab),
-                           CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
-                           CvtColorInfo(3, 3, cv::COLOR_LBGR2Luv),
-                           CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
-                           CvtColorInfo(3, 3, cv::COLOR_Lab2LBGR),
-                           CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
-                           CvtColorInfo(3, 3, cv::COLOR_Luv2LRGB))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const CvtColorInfo info = GET_PARAM(2);
-
-    cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
-    cv::randu(src, 0, depth == CV_8U ? 255.0 : 1.0);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
-
-        GPU_SANITY_CHECK(dst, 1e-4);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-PERF_TEST_P(Sz_Depth_Code, CvtColorBayer,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U),
-                    Values(CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
-                           CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
-                           CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
-                           CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
-
-                           CvtColorInfo(1, 1, cv::COLOR_BayerBG2GRAY),
-                           CvtColorInfo(1, 1, cv::COLOR_BayerGB2GRAY),
-                           CvtColorInfo(1, 1, cv::COLOR_BayerRG2GRAY),
-                           CvtColorInfo(1, 1, cv::COLOR_BayerGR2GRAY))))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const CvtColorInfo info = GET_PARAM(2);
-
-    cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-CV_ENUM(DemosaicingCode,
-        COLOR_BayerBG2BGR, COLOR_BayerGB2BGR, COLOR_BayerRG2BGR, COLOR_BayerGR2BGR,
-        COLOR_BayerBG2GRAY, COLOR_BayerGB2GRAY, COLOR_BayerRG2GRAY, COLOR_BayerGR2GRAY,
-        COLOR_BayerBG2BGR_MHT, COLOR_BayerGB2BGR_MHT, COLOR_BayerRG2BGR_MHT, COLOR_BayerGR2BGR_MHT,
-        COLOR_BayerBG2GRAY_MHT, COLOR_BayerGB2GRAY_MHT, COLOR_BayerRG2GRAY_MHT, COLOR_BayerGR2GRAY_MHT)
-
-DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode);
-
-PERF_TEST_P(Sz_Code, Demosaicing,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    DemosaicingCode::all()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int code = GET_PARAM(1);
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::demosaicing(d_src, dst, code);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        if (code >= cv::COLOR_COLORCVT_MAX)
-        {
-            FAIL_NO_CPU();
-        }
-        else
-        {
-            cv::Mat dst;
-
-            TEST_CYCLE() cv::cvtColor(src, dst, code);
-
-            CPU_SANITY_CHECK(dst);
-        }
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// SwapChannels
-
-PERF_TEST_P(Sz, SwapChannels,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC4);
-    declare.in(src, WARMUP_RNG);
-
-    const int dstOrder[] = {2, 1, 0, 3};
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat dst(src);
-
-        TEST_CYCLE() cv::gpu::swapChannels(dst, dstOrder);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// AlphaComp
-
-CV_ENUM(AlphaOp, ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL, ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL)
-
-DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, AlphaOp);
-
-PERF_TEST_P(Sz_Type_Op, AlphaComp,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
-                    AlphaOp::all()))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int type = GET_PARAM(1);
-    const int alpha_op = GET_PARAM(2);
-
-    cv::Mat img1(size, type);
-    cv::Mat img2(size, type);
-    declare.in(img1, img2, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_img1(img1);
-        const cv::gpu::GpuMat d_img2(img2);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::alphaComp(d_img1, d_img2, dst, alpha_op);
-
-        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// HoughLines
-
-namespace
-{
-    struct Vec4iComparator
-    {
-        bool operator()(const cv::Vec4i& a, const cv::Vec4i b) const
-        {
-            if (a[0] != b[0]) return a[0] < b[0];
-            else if(a[1] != b[1]) return a[1] < b[1];
-            else if(a[2] != b[2]) return a[2] < b[2];
-            else return a[3] < b[3];
-        }
-    };
-    struct Vec3fComparator
-    {
-        bool operator()(const cv::Vec3f& a, const cv::Vec3f b) const
-        {
-            if(a[0] != b[0]) return a[0] < b[0];
-            else if(a[1] != b[1]) return a[1] < b[1];
-            else return a[2] < b[2];
-        }
-    };
-    struct Vec2fComparator
-    {
-        bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const
-        {
-            if(a[0] != b[0]) return a[0] < b[0];
-            else return a[1] < b[1];
-        }
-    };
-}
-
-PERF_TEST_P(Sz, HoughLines,
-            GPU_TYPICAL_MAT_SIZES)
-{
-    declare.time(30.0);
-
-    const cv::Size size = GetParam();
-
-    const float rho = 1.0f;
-    const float theta = static_cast<float>(CV_PI / 180.0);
-    const int threshold = 300;
-
-    cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
-    cv::line(src, cv::Point(0, 100), cv::Point(src.cols, 100), cv::Scalar::all(255), 1);
-    cv::line(src, cv::Point(0, 200), cv::Point(src.cols, 200), cv::Scalar::all(255), 1);
-    cv::line(src, cv::Point(0, 400), cv::Point(src.cols, 400), cv::Scalar::all(255), 1);
-    cv::line(src, cv::Point(100, 0), cv::Point(100, src.rows), cv::Scalar::all(255), 1);
-    cv::line(src, cv::Point(200, 0), cv::Point(200, src.rows), cv::Scalar::all(255), 1);
-    cv::line(src, cv::Point(400, 0), cv::Point(400, src.rows), cv::Scalar::all(255), 1);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_lines;
-        cv::gpu::HoughLinesBuf d_buf;
-
-        TEST_CYCLE() cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold);
-
-        cv::Mat gpu_lines(d_lines.row(0));
-        cv::Vec2f* begin = gpu_lines.ptr<cv::Vec2f>(0);
-        cv::Vec2f* end = begin + gpu_lines.cols;
-        std::sort(begin, end, Vec2fComparator());
-        SANITY_CHECK(gpu_lines);
-    }
-    else
-    {
-        std::vector<cv::Vec2f> cpu_lines;
-
-        TEST_CYCLE() cv::HoughLines(src, cpu_lines, rho, theta, threshold);
-
-        SANITY_CHECK(cpu_lines);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// HoughLinesP
-
-DEF_PARAM_TEST_1(Image, std::string);
-
-PERF_TEST_P(Image, HoughLinesP,
-            testing::Values("cv/shared/pic5.png", "stitching/a1.png"))
-{
-    declare.time(30.0);
-
-    const std::string fileName = getDataPath(GetParam());
-
-    const float rho = 1.0f;
-    const float theta = static_cast<float>(CV_PI / 180.0);
-    const int threshold = 100;
-    const int minLineLenght = 50;
-    const int maxLineGap = 5;
-
-    const cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
-
-    cv::Mat mask;
-    cv::Canny(image, mask, 50, 100);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_mask(mask);
-        cv::gpu::GpuMat d_lines;
-        cv::gpu::HoughLinesBuf d_buf;
-
-        TEST_CYCLE() cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
-
-        cv::Mat gpu_lines(d_lines);
-        cv::Vec4i* begin = gpu_lines.ptr<cv::Vec4i>();
-        cv::Vec4i* end = begin + gpu_lines.cols;
-        std::sort(begin, end, Vec4iComparator());
-        SANITY_CHECK(gpu_lines);
-    }
-    else
-    {
-        std::vector<cv::Vec4i> cpu_lines;
-
-        TEST_CYCLE() cv::HoughLinesP(mask, cpu_lines, rho, theta, threshold, minLineLenght, maxLineGap);
-
-        SANITY_CHECK(cpu_lines);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// HoughCircles
-
-DEF_PARAM_TEST(Sz_Dp_MinDist, cv::Size, float, float);
-
-PERF_TEST_P(Sz_Dp_MinDist, HoughCircles,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(1.0f, 2.0f, 4.0f),
-                    Values(1.0f)))
-{
-    declare.time(30.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const float dp = GET_PARAM(1);
-    const float minDist = GET_PARAM(2);
-
-    const int minRadius = 10;
-    const int maxRadius = 30;
-    const int cannyThreshold = 100;
-    const int votesThreshold = 15;
-
-    cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
-    cv::circle(src, cv::Point(100, 100), 20, cv::Scalar::all(255), -1);
-    cv::circle(src, cv::Point(200, 200), 25, cv::Scalar::all(255), -1);
-    cv::circle(src, cv::Point(200, 100), 25, cv::Scalar::all(255), -1);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_circles;
-        cv::gpu::HoughCirclesBuf d_buf;
-
-        TEST_CYCLE() cv::gpu::HoughCircles(d_src, d_circles, d_buf, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
-
-        cv::Mat gpu_circles(d_circles);
-        cv::Vec3f* begin = gpu_circles.ptr<cv::Vec3f>(0);
-        cv::Vec3f* end = begin + gpu_circles.cols;
-        std::sort(begin, end, Vec3fComparator());
-        SANITY_CHECK(gpu_circles);
-    }
-    else
-    {
-        std::vector<cv::Vec3f> cpu_circles;
-
-        TEST_CYCLE() cv::HoughCircles(src, cpu_circles, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
-
-        SANITY_CHECK(cpu_circles);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// GeneralizedHough
-
-enum { GHT_POSITION = cv::GeneralizedHough::GHT_POSITION,
-       GHT_SCALE    = cv::GeneralizedHough::GHT_SCALE,
-       GHT_ROTATION = cv::GeneralizedHough::GHT_ROTATION
-     };
-
-CV_FLAGS(GHMethod, GHT_POSITION, GHT_SCALE, GHT_ROTATION);
-
-DEF_PARAM_TEST(Method_Sz, GHMethod, cv::Size);
-
-PERF_TEST_P(Method_Sz, GeneralizedHough,
-            Combine(Values(GHMethod(GHT_POSITION), GHMethod(GHT_POSITION | GHT_SCALE), GHMethod(GHT_POSITION | GHT_ROTATION), GHMethod(GHT_POSITION | GHT_SCALE | GHT_ROTATION)),
-                    GPU_TYPICAL_MAT_SIZES))
-{
-    declare.time(10);
-
-    const int method = GET_PARAM(0);
-    const cv::Size imageSize = GET_PARAM(1);
-
-    const cv::Mat templ = readImage("cv/shared/templ.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(templ.empty());
-
-    cv::Mat image(imageSize, CV_8UC1, cv::Scalar::all(0));
-    templ.copyTo(image(cv::Rect(50, 50, templ.cols, templ.rows)));
-
-    cv::RNG rng(123456789);
-    const int objCount = rng.uniform(5, 15);
-    for (int i = 0; i < objCount; ++i)
-    {
-        double scale = rng.uniform(0.7, 1.3);
-        bool rotate = 1 == rng.uniform(0, 2);
-
-        cv::Mat obj;
-        cv::resize(templ, obj, cv::Size(), scale, scale);
-        if (rotate)
-            obj = obj.t();
-
-        cv::Point pos;
-
-        pos.x = rng.uniform(0, image.cols - obj.cols);
-        pos.y = rng.uniform(0, image.rows - obj.rows);
-
-        cv::Mat roi = image(cv::Rect(pos, obj.size()));
-        cv::add(roi, obj, roi);
-    }
-
-    cv::Mat edges;
-    cv::Canny(image, edges, 50, 100);
-
-    cv::Mat dx, dy;
-    cv::Sobel(image, dx, CV_32F, 1, 0);
-    cv::Sobel(image, dy, CV_32F, 0, 1);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_edges(edges);
-        const cv::gpu::GpuMat d_dx(dx);
-        const cv::gpu::GpuMat d_dy(dy);
-        cv::gpu::GpuMat posAndVotes;
-
-        cv::Ptr<cv::gpu::GeneralizedHough_GPU> d_hough = cv::gpu::GeneralizedHough_GPU::create(method);
-        if (method & GHT_ROTATION)
-        {
-            d_hough->set("maxAngle", 90.0);
-            d_hough->set("angleStep", 2.0);
-        }
-
-        d_hough->setTemplate(cv::gpu::GpuMat(templ));
-
-        TEST_CYCLE() d_hough->detect(d_edges, d_dx, d_dy, posAndVotes);
-
-        const cv::gpu::GpuMat positions(1, posAndVotes.cols, CV_32FC4, posAndVotes.data);
-        GPU_SANITY_CHECK(positions);
-    }
-    else
-    {
-        cv::Mat positions;
-
-        cv::Ptr<cv::GeneralizedHough> hough = cv::GeneralizedHough::create(method);
-        if (method & GHT_ROTATION)
-        {
-            hough->set("maxAngle", 90.0);
-            hough->set("angleStep", 2.0);
-        }
-
-        hough->setTemplate(templ);
-
-        TEST_CYCLE() hough->detect(edges, dx, dy, positions);
-
-        CPU_SANITY_CHECK(positions);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BilateralFilter
-
-DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
-
-PERF_TEST_P(Sz_Depth_Cn_KernelSz, BilateralFilter,
-            Combine(GPU_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_32F),
-                    GPU_CHANNELS_1_3,
-                    Values(3, 5, 9)))
-{
-    declare.time(60.0);
-
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-    const int kernel_size = GET_PARAM(3);
-
-    const float sigma_color = 7;
-    const float sigma_spatial = 5;
-    const int borderMode = cv::BORDER_REFLECT101;
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_GPU())
-    {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-
-        TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
-
-        GPU_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////
-// GoodFeaturesToTrack
-
-DEF_PARAM_TEST(Image_MinDistance, string, double);
-
-PERF_TEST_P(Image_MinDistance, GoodFeaturesToTrack,
-            Combine(Values<string>("gpu/perf/aloe.png"),
-                    Values(0.0, 3.0)))
-{
-    const string fileName = GET_PARAM(0);
-    const double minDistance = GET_PARAM(1);
-
-    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
-
-    const int maxCorners = 8000;
-    const double qualityLevel = 0.01;
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance);
-
-        const cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat pts;
-
-        TEST_CYCLE() d_detector(d_image, pts);
-
-        GPU_SANITY_CHECK(pts);
-    }
-    else
-    {
-        cv::Mat pts;
-
-        TEST_CYCLE() cv::goodFeaturesToTrack(image, pts, maxCorners, qualityLevel, minDistance);
-
-        CPU_SANITY_CHECK(pts);
-    }
-}
diff --git a/modules/gpuimgproc/perf/perf_match_template.cpp b/modules/gpuimgproc/perf/perf_match_template.cpp
new file mode 100644
index 000000000..f3af14914
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_match_template.cpp
@@ -0,0 +1,131 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate8U
+
+CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
+
+DEF_PARAM_TEST(Sz_TemplateSz_Cn_Method, cv::Size, cv::Size, MatCn, TemplateMethod);
+
+PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate8U,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
+                    GPU_CHANNELS_1_3_4,
+                    TemplateMethod::all()))
+{
+    declare.time(300.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const cv::Size templ_size = GET_PARAM(1);
+    const int cn = GET_PARAM(2);
+    const int method = GET_PARAM(3);
+
+    cv::Mat image(size, CV_MAKE_TYPE(CV_8U, cn));
+    cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_8U, cn));
+    declare.in(image, templ, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_image(image);
+        const cv::gpu::GpuMat d_templ(templ);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
+
+        GPU_SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::matchTemplate(image, templ, dst, method);
+
+        CPU_SANITY_CHECK(dst);
+    }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate32F
+
+PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate32F,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
+                    GPU_CHANNELS_1_3_4,
+                    Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))))
+{
+    declare.time(300.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const cv::Size templ_size = GET_PARAM(1);
+    const int cn = GET_PARAM(2);
+    int method = GET_PARAM(3);
+
+    cv::Mat image(size, CV_MAKE_TYPE(CV_32F, cn));
+    cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_32F, cn));
+    declare.in(image, templ, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_image(image);
+        const cv::gpu::GpuMat d_templ(templ);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
+
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::matchTemplate(image, templ, dst, method);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
diff --git a/modules/gpuimgproc/perf/perf_mean_shift.cpp b/modules/gpuimgproc/perf/perf_mean_shift.cpp
new file mode 100644
index 000000000..0ac0b71c4
--- /dev/null
+++ b/modules/gpuimgproc/perf/perf_mean_shift.cpp
@@ -0,0 +1,152 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace testing;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// MeanShiftFiltering
+
+DEF_PARAM_TEST_1(Image, string);
+
+PERF_TEST_P(Image, MeanShiftFiltering,
+            Values<string>("gpu/meanshift/cones.png"))
+{
+    declare.time(300.0);
+
+    const cv::Mat img = readImage(GetParam());
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    const int sp = 50;
+    const int sr = 50;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(rgba);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::meanShiftFiltering(d_src, dst, sp, sr);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::pyrMeanShiftFiltering(img, dst, sp, sr);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MeanShiftProc
+
+PERF_TEST_P(Image, MeanShiftProc,
+            Values<string>("gpu/meanshift/cones.png"))
+{
+    declare.time(300.0);
+
+    const cv::Mat img = readImage(GetParam());
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    const int sp = 50;
+    const int sr = 50;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(rgba);
+        cv::gpu::GpuMat dstr;
+        cv::gpu::GpuMat dstsp;
+
+        TEST_CYCLE() cv::gpu::meanShiftProc(d_src, dstr, dstsp, sp, sr);
+
+        GPU_SANITY_CHECK(dstr);
+        GPU_SANITY_CHECK(dstsp);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MeanShiftSegmentation
+
+PERF_TEST_P(Image, MeanShiftSegmentation,
+            Values<string>("gpu/meanshift/cones.png"))
+{
+    declare.time(300.0);
+
+    const cv::Mat img = readImage(GetParam());
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    const int sp = 10;
+    const int sr = 10;
+    const int minsize = 20;
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(rgba);
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::gpu::meanShiftSegmentation(d_src, dst, sp, sr, minsize);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
diff --git a/modules/gpuimgproc/src/blend.cpp b/modules/gpuimgproc/src/blend.cpp
index 3fd650781..e92e37945 100644
--- a/modules/gpuimgproc/src/blend.cpp
+++ b/modules/gpuimgproc/src/blend.cpp
@@ -51,6 +51,9 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
 
 #else
 
+////////////////////////////////////////////////////////////////////////
+// blendLinear
+
 namespace cv { namespace gpu { namespace cudev
 {
     namespace blend
diff --git a/modules/gpuimgproc/src/canny.cpp b/modules/gpuimgproc/src/canny.cpp
new file mode 100644
index 000000000..8d361fe50
--- /dev/null
+++ b/modules/gpuimgproc/src/canny.cpp
@@ -0,0 +1,186 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
+void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
+void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_no_cuda(); }
+void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_no_cuda(); }
+void cv::gpu::CannyBuf::create(const Size&, int) { throw_no_cuda(); }
+void cv::gpu::CannyBuf::release() { throw_no_cuda(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)
+{
+    if (apperture_size > 0)
+    {
+        ensureSizeIsEnough(image_size, CV_32SC1, dx);
+        ensureSizeIsEnough(image_size, CV_32SC1, dy);
+
+        if (apperture_size != 3)
+        {
+            filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
+            filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
+        }
+    }
+
+    ensureSizeIsEnough(image_size, CV_32FC1, mag);
+    ensureSizeIsEnough(image_size, CV_32SC1, map);
+
+    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st1);
+    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st2);
+}
+
+void cv::gpu::CannyBuf::release()
+{
+    dx.release();
+    dy.release();
+    mag.release();
+    map.release();
+    st1.release();
+    st2.release();
+}
+
+namespace canny
+{
+    void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
+    void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
+
+    void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh);
+
+    void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1);
+
+    void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2);
+
+    void getEdges(PtrStepSzi map, PtrStepSzb dst);
+}
+
+namespace
+{
+    void CannyCaller(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
+    {
+        using namespace canny;
+
+        buf.map.setTo(Scalar::all(0));
+        calcMap(dx, dy, buf.mag, buf.map, low_thresh, high_thresh);
+
+        edgesHysteresisLocal(buf.map, buf.st1.ptr<ushort2>());
+
+        edgesHysteresisGlobal(buf.map, buf.st1.ptr<ushort2>(), buf.st2.ptr<ushort2>());
+
+        getEdges(buf.map, dst);
+    }
+}
+
+void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
+{
+    CannyBuf buf;
+    Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
+}
+
+void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
+{
+    using namespace canny;
+
+    CV_Assert(src.type() == CV_8UC1);
+
+    if (!deviceSupports(SHARED_ATOMICS))
+        CV_Error(cv::Error::StsNotImplemented, "The device doesn't support shared atomics");
+
+    if( low_thresh > high_thresh )
+        std::swap( low_thresh, high_thresh);
+
+    dst.create(src.size(), CV_8U);
+    buf.create(src.size(), apperture_size);
+
+    if (apperture_size == 3)
+    {
+        Size wholeSize;
+        Point ofs;
+        src.locateROI(wholeSize, ofs);
+        GpuMat srcWhole(wholeSize, src.type(), src.datastart, src.step);
+
+        calcMagnitude(srcWhole, ofs.x, ofs.y, buf.dx, buf.dy, buf.mag, L2gradient);
+    }
+    else
+    {
+        buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows));
+        buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows));
+
+        calcMagnitude(buf.dx, buf.dy, buf.mag, L2gradient);
+    }
+
+    CannyCaller(buf.dx, buf.dy, buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
+}
+
+void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
+{
+    CannyBuf buf;
+    Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
+}
+
+void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
+{
+    using namespace canny;
+
+    CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
+    CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
+
+    if( low_thresh > high_thresh )
+        std::swap( low_thresh, high_thresh);
+
+    dst.create(dx.size(), CV_8U);
+    buf.create(dx.size(), -1);
+
+    calcMagnitude(dx, dy, buf.mag, L2gradient);
+
+    CannyCaller(dx, dy, buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/color.cpp b/modules/gpuimgproc/src/color.cpp
index dc3582348..1b14d40c8 100644
--- a/modules/gpuimgproc/src/color.cpp
+++ b/modules/gpuimgproc/src/color.cpp
@@ -48,10 +48,16 @@ using namespace cv::gpu;
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
 void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::demosaicing(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_no_cuda(); }
+
 void cv::gpu::gammaCorrection(const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
 
+void cv::gpu::alphaComp(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
+
+
 #else /* !defined (HAVE_CUDA) */
 
 #include "cvt_color_internal.h"
@@ -1581,7 +1587,7 @@ namespace
         (void)src;
         (void)dst;
         (void)st;
-        CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
+        CV_Error( cv::Error::StsBadFlag, "Unknown/unsupported color conversion code" );
     #else
         CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4);
 
@@ -1676,6 +1682,9 @@ namespace
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+// cvtColor
+
 void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
 {
     typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream);
@@ -1859,6 +1868,9 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
     func(src, dst, dcn, stream);
 }
 
+////////////////////////////////////////////////////////////////////////
+// demosaicing
+
 void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
 {
     const int depth = src.depth();
@@ -1927,6 +1939,9 @@ void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Str
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+// swapChannels
+
 void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
 {
     CV_Assert(image.type() == CV_8UC4);
@@ -1945,6 +1960,9 @@ void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
         cudaSafeCall( cudaDeviceSynchronize() );
 }
 
+////////////////////////////////////////////////////////////////////////
+// gammaCorrection
+
 void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stream& stream)
 {
 #if (CUDA_VERSION < 5000)
@@ -1986,4 +2004,77 @@ void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stre
 #endif
 }
 
+////////////////////////////////////////////////////////////////////////
+// alphaComp
+
+namespace
+{
+    template <int DEPTH> struct NppAlphaCompFunc
+    {
+        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
+
+        typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, NppiAlphaOp eAlphaOp);
+    };
+
+    template <int DEPTH, typename NppAlphaCompFunc<DEPTH>::func_t func> struct NppAlphaComp
+    {
+        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
+
+        static void call(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream)
+        {
+            NppStreamHandler h(stream);
+
+            NppiSize oSizeROI;
+            oSizeROI.width = img1.cols;
+            oSizeROI.height = img2.rows;
+
+            nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
+                              dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    };
+}
+
+void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream)
+{
+    static const NppiAlphaOp npp_alpha_ops[] = {
+        NPPI_OP_ALPHA_OVER,
+        NPPI_OP_ALPHA_IN,
+        NPPI_OP_ALPHA_OUT,
+        NPPI_OP_ALPHA_ATOP,
+        NPPI_OP_ALPHA_XOR,
+        NPPI_OP_ALPHA_PLUS,
+        NPPI_OP_ALPHA_OVER_PREMUL,
+        NPPI_OP_ALPHA_IN_PREMUL,
+        NPPI_OP_ALPHA_OUT_PREMUL,
+        NPPI_OP_ALPHA_ATOP_PREMUL,
+        NPPI_OP_ALPHA_XOR_PREMUL,
+        NPPI_OP_ALPHA_PLUS_PREMUL,
+        NPPI_OP_ALPHA_PREMUL
+    };
+
+    typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
+
+    static const func_t funcs[] =
+    {
+        NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
+        0,
+        NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call,
+        0,
+        NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call,
+        NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
+    };
+
+    CV_Assert( img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4 );
+    CV_Assert( img1.size() == img2.size() && img1.type() == img2.type() );
+
+    dst.create(img1.size(), img1.type());
+
+    const func_t func = funcs[img1.depth()];
+
+    func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream));
+}
+
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/corners.cpp b/modules/gpuimgproc/src/corners.cpp
new file mode 100644
index 000000000..44dc1505d
--- /dev/null
+++ b/modules/gpuimgproc/src/corners.cpp
@@ -0,0 +1,149 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_no_cuda(); }
+void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int) { throw_no_cuda(); }
+void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int, Stream&) { throw_no_cuda(); }
+
+void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
+void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
+void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
+        void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
+    }
+}}}
+
+namespace
+{
+    void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
+    {
+        double scale = static_cast<double>(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
+
+        if (ksize < 0)
+            scale *= 2.;
+
+        if (src.depth() == CV_8U)
+            scale *= 255.;
+
+        scale = 1./scale;
+
+        Dx.create(src.size(), CV_32F);
+        Dy.create(src.size(), CV_32F);
+
+        if (ksize > 0)
+        {
+            Sobel(src, Dx, CV_32F, 1, 0, buf, ksize, scale, borderType, -1, stream);
+            Sobel(src, Dy, CV_32F, 0, 1, buf, ksize, scale, borderType, -1, stream);
+        }
+        else
+        {
+            Scharr(src, Dx, CV_32F, 1, 0, buf, scale, borderType, -1, stream);
+            Scharr(src, Dy, CV_32F, 0, 1, buf, scale, borderType, -1, stream);
+        }
+    }
+}
+
+void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)
+{
+    GpuMat Dx, Dy;
+    cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k, borderType);
+}
+
+void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType)
+{
+    GpuMat buf;
+    cornerHarris(src, dst, Dx, Dy, buf, blockSize, ksize, k, borderType);
+}
+
+void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
+{
+    using namespace cv::gpu::cudev::imgproc;
+
+    CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
+
+    extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
+
+    dst.create(src.size(), CV_32F);
+
+    cornerHarris_gpu(blockSize, static_cast<float>(k), Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
+{
+    GpuMat Dx, Dy;
+    cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize, borderType);
+}
+
+void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)
+{
+    GpuMat buf;
+    cornerMinEigenVal(src, dst, Dx, Dy, buf, blockSize, ksize, borderType);
+}
+
+void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
+{
+    using namespace ::cv::gpu::cudev::imgproc;
+
+    CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
+
+    extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
+
+    dst.create(src.size(), CV_32F);
+
+    cornerMinEigenVal_gpu(blockSize, Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/cuda/imgproc.cu b/modules/gpuimgproc/src/cuda/corners.cu
similarity index 65%
rename from modules/gpuimgproc/src/cuda/imgproc.cu
rename to modules/gpuimgproc/src/cuda/corners.cu
index 3f39a43eb..39e7cdc5d 100644
--- a/modules/gpuimgproc/src/cuda/imgproc.cu
+++ b/modules/gpuimgproc/src/cuda/corners.cu
@@ -52,137 +52,6 @@ namespace cv { namespace gpu { namespace cudev
 {
     namespace imgproc
     {
-        /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
-
-        texture<uchar4, 2> tex_meanshift;
-
-        __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
-                                        size_t out_step, int cols, int rows,
-                                        int sp, int sr, int maxIter, float eps)
-        {
-            int isr2 = sr*sr;
-            uchar4 c = tex2D(tex_meanshift, x0, y0 );
-
-            // iterate meanshift procedure
-            for( int iter = 0; iter < maxIter; iter++ )
-            {
-                int count = 0;
-                int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
-                float icount;
-
-                //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
-                int minx = x0-sp;
-                int miny = y0-sp;
-                int maxx = x0+sp;
-                int maxy = y0+sp;
-
-                for( int y = miny; y <= maxy; y++)
-                {
-                    int rowCount = 0;
-                    for( int x = minx; x <= maxx; x++ )
-                    {
-                        uchar4 t = tex2D( tex_meanshift, x, y );
-
-                        int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
-                        if( norm2 <= isr2 )
-                        {
-                            s0 += t.x; s1 += t.y; s2 += t.z;
-                            sx += x; rowCount++;
-                        }
-                    }
-                    count += rowCount;
-                    sy += y*rowCount;
-                }
-
-                if( count == 0 )
-                    break;
-
-                icount = 1.f/count;
-                int x1 = __float2int_rz(sx*icount);
-                int y1 = __float2int_rz(sy*icount);
-                s0 = __float2int_rz(s0*icount);
-                s1 = __float2int_rz(s1*icount);
-                s2 = __float2int_rz(s2*icount);
-
-                int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
-
-                bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps);
-
-                x0 = x1; y0 = y1;
-                c.x = s0; c.y = s1; c.z = s2;
-
-                if( stopFlag )
-                    break;
-            }
-
-            int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 4 * sizeof(uchar);
-            *(uchar4*)(out + base) = c;
-
-            return make_short2((short)x0, (short)y0);
-        }
-
-        __global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
-        {
-            int x0 = blockIdx.x * blockDim.x + threadIdx.x;
-            int y0 = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if( x0 < cols && y0 < rows )
-                do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
-        }
-
-        __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
-                                             unsigned char* outsp, size_t outspstep,
-                                             int cols, int rows,
-                                             int sp, int sr, int maxIter, float eps)
-        {
-            int x0 = blockIdx.x * blockDim.x + threadIdx.x;
-            int y0 = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if( x0 < cols && y0 < rows )
-            {
-                int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
-                *(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
-            }
-        }
-
-        void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
-        {
-            dim3 grid(1, 1, 1);
-            dim3 threads(32, 8, 1);
-            grid.x = divUp(src.cols, threads.x);
-            grid.y = divUp(src.rows, threads.y);
-
-            cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
-            cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
-
-            meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-
-            //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
-        }
-
-        void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
-        {
-            dim3 grid(1, 1, 1);
-            dim3 threads(32, 8, 1);
-            grid.x = divUp(src.cols, threads.x);
-            grid.y = divUp(src.rows, threads.y);
-
-            cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
-            cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
-
-            meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-
-            //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
-        }
-
         /////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
 
         texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
@@ -399,8 +268,7 @@ namespace cv { namespace gpu { namespace cudev
             if (stream == 0)
                 cudaSafeCall(cudaDeviceSynchronize());
         }
-    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev {
+    }
+}}}
 
-
-#endif /* CUDA_DISABLER */
+#endif
diff --git a/modules/gpuimgproc/src/cuda/mean_shift.cu b/modules/gpuimgproc/src/cuda/mean_shift.cu
new file mode 100644
index 000000000..aa82f295e
--- /dev/null
+++ b/modules/gpuimgproc/src/cuda/mean_shift.cu
@@ -0,0 +1,182 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/border_interpolate.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        texture<uchar4, 2> tex_meanshift;
+
+        __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
+                                        size_t out_step, int cols, int rows,
+                                        int sp, int sr, int maxIter, float eps)
+        {
+            int isr2 = sr*sr;
+            uchar4 c = tex2D(tex_meanshift, x0, y0 );
+
+            // iterate meanshift procedure
+            for( int iter = 0; iter < maxIter; iter++ )
+            {
+                int count = 0;
+                int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
+                float icount;
+
+                //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
+                int minx = x0-sp;
+                int miny = y0-sp;
+                int maxx = x0+sp;
+                int maxy = y0+sp;
+
+                for( int y = miny; y <= maxy; y++)
+                {
+                    int rowCount = 0;
+                    for( int x = minx; x <= maxx; x++ )
+                    {
+                        uchar4 t = tex2D( tex_meanshift, x, y );
+
+                        int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
+                        if( norm2 <= isr2 )
+                        {
+                            s0 += t.x; s1 += t.y; s2 += t.z;
+                            sx += x; rowCount++;
+                        }
+                    }
+                    count += rowCount;
+                    sy += y*rowCount;
+                }
+
+                if( count == 0 )
+                    break;
+
+                icount = 1.f/count;
+                int x1 = __float2int_rz(sx*icount);
+                int y1 = __float2int_rz(sy*icount);
+                s0 = __float2int_rz(s0*icount);
+                s1 = __float2int_rz(s1*icount);
+                s2 = __float2int_rz(s2*icount);
+
+                int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
+
+                bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps);
+
+                x0 = x1; y0 = y1;
+                c.x = s0; c.y = s1; c.z = s2;
+
+                if( stopFlag )
+                    break;
+            }
+
+            int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 4 * sizeof(uchar);
+            *(uchar4*)(out + base) = c;
+
+            return make_short2((short)x0, (short)y0);
+        }
+
+        __global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
+        {
+            int x0 = blockIdx.x * blockDim.x + threadIdx.x;
+            int y0 = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if( x0 < cols && y0 < rows )
+                do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
+        }
+
+        void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
+        {
+            dim3 grid(1, 1, 1);
+            dim3 threads(32, 8, 1);
+            grid.x = divUp(src.cols, threads.x);
+            grid.y = divUp(src.rows, threads.y);
+
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
+            cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
+
+            meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
+                                             unsigned char* outsp, size_t outspstep,
+                                             int cols, int rows,
+                                             int sp, int sr, int maxIter, float eps)
+        {
+            int x0 = blockIdx.x * blockDim.x + threadIdx.x;
+            int y0 = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if( x0 < cols && y0 < rows )
+            {
+                int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
+                *(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
+            }
+        }
+
+        void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
+        {
+            dim3 grid(1, 1, 1);
+            dim3 threads(32, 8, 1);
+            grid.x = divUp(src.cols, threads.x);
+            grid.y = divUp(src.rows, threads.y);
+
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
+            cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
+
+            meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+    }
+}}}
+
+#endif
diff --git a/modules/gpuimgproc/src/gftt.cpp b/modules/gpuimgproc/src/gftt.cpp
index 18a729bc1..cca1df444 100644
--- a/modules/gpuimgproc/src/gftt.cpp
+++ b/modules/gpuimgproc/src/gftt.cpp
@@ -62,6 +62,12 @@ namespace cv { namespace gpu { namespace cudev
 
 void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask)
 {
+#ifndef HAVE_OPENCV_GPUARITHM
+    (void) image;
+    (void) corners;
+    (void) mask;
+    throw_no_cuda();
+#else
     using namespace cv::gpu::cudev::gfft;
 
     CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
@@ -75,7 +81,7 @@ void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image,
         cornerMinEigenVal(image, eig_, Dx_, Dy_, buf_, blockSize, 3);
 
     double maxVal = 0;
-    minMax(eig_, 0, &maxVal, GpuMat(), minMaxbuf_);
+    gpu::minMax(eig_, 0, &maxVal, GpuMat(), minMaxbuf_);
 
     ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
 
@@ -164,6 +170,7 @@ void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image,
 
         corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
     }
+#endif
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/imgproc.cpp b/modules/gpuimgproc/src/histogram.cpp
similarity index 60%
rename from modules/gpuimgproc/src/imgproc.cpp
rename to modules/gpuimgproc/src/histogram.cpp
index 100d09186..3227dac6c 100644
--- a/modules/gpuimgproc/src/imgproc.cpp
+++ b/modules/gpuimgproc/src/histogram.cpp
@@ -47,113 +47,29 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
-void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); }
+
 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_no_cuda(); }
 void cv::gpu::histEven(const GpuMat&, GpuMat*, GpuMat&, int*, int*, int*, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_no_cuda(); }
 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::calcHist(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_no_cuda(); }
-void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int) { throw_no_cuda(); }
-void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
-void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_no_cuda(); }
-void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
-void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_no_cuda(); }
-void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_no_cuda(); }
-void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_no_cuda(); }
-void cv::gpu::CannyBuf::create(const Size&, int) { throw_no_cuda(); }
-void cv::gpu::CannyBuf::release() { throw_no_cuda(); }
+
 cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::gpu::CLAHE>(); }
-void cv::gpu::alphaComp(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
 ////////////////////////////////////////////////////////////////////////
-// meanShiftFiltering_GPU
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
-{
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    if( src.empty() )
-        CV_Error( cv::Error::StsBadArg, "The input image is empty" );
-
-    if( src.depth() != CV_8U || src.channels() != 4 )
-        CV_Error( cv::Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
-
-    dst.create( src.size(), CV_8UC4 );
-
-    if( !(criteria.type & TermCriteria::MAX_ITER) )
-        criteria.maxCount = 5;
-
-    int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
-
-    float eps;
-    if( !(criteria.type & TermCriteria::EPS) )
-        eps = 1.f;
-    eps = (float)std::max(criteria.epsilon, 0.0);
-
-    meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
-// meanShiftProc_GPU
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
-    }
-}}}
-
-void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
-{
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    if( src.empty() )
-        CV_Error( cv::Error::StsBadArg, "The input image is empty" );
-
-    if( src.depth() != CV_8U || src.channels() != 4 )
-        CV_Error( cv::Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
-
-    dstr.create( src.size(), CV_8UC4 );
-    dstsp.create( src.size(), CV_16SC2 );
-
-    if( !(criteria.type & TermCriteria::MAX_ITER) )
-        criteria.maxCount = 5;
-
-    int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
-
-    float eps;
-    if( !(criteria.type & TermCriteria::EPS) )
-        eps = 1.f;
-    eps = (float)std::max(criteria.epsilon, 0.0);
-
-    meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
-}
-
-
-////////////////////////////////////////////////////////////////////////
-// Histogram
+// NPP Histogram
 
 namespace
 {
@@ -444,10 +360,12 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
 }
 
+////////////////////////////////////////////////////////////////////////
+// calcHist
+
 namespace hist
 {
     void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream);
-    void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
 }
 
 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
@@ -460,6 +378,14 @@ void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
     hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
 }
 
+////////////////////////////////////////////////////////////////////////
+// equalizeHist
+
+namespace hist
+{
+    void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
+}
+
 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream)
 {
     GpuMat hist;
@@ -492,229 +418,6 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat&
     hist::equalizeHist(src, dst, lut.ptr<int>(), stream);
 }
 
-////////////////////////////////////////////////////////////////////////
-// cornerHarris & minEgenVal
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
-        void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
-    }
-}}}
-
-namespace
-{
-    void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
-    {
-        double scale = static_cast<double>(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
-
-        if (ksize < 0)
-            scale *= 2.;
-
-        if (src.depth() == CV_8U)
-            scale *= 255.;
-
-        scale = 1./scale;
-
-        Dx.create(src.size(), CV_32F);
-        Dy.create(src.size(), CV_32F);
-
-        if (ksize > 0)
-        {
-            Sobel(src, Dx, CV_32F, 1, 0, buf, ksize, scale, borderType, -1, stream);
-            Sobel(src, Dy, CV_32F, 0, 1, buf, ksize, scale, borderType, -1, stream);
-        }
-        else
-        {
-            Scharr(src, Dx, CV_32F, 1, 0, buf, scale, borderType, -1, stream);
-            Scharr(src, Dy, CV_32F, 0, 1, buf, scale, borderType, -1, stream);
-        }
-    }
-}
-
-void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)
-{
-    GpuMat Dx, Dy;
-    cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k, borderType);
-}
-
-void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType)
-{
-    GpuMat buf;
-    cornerHarris(src, dst, Dx, Dy, buf, blockSize, ksize, k, borderType);
-}
-
-void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
-{
-    using namespace cv::gpu::cudev::imgproc;
-
-    CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
-
-    extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
-
-    dst.create(src.size(), CV_32F);
-
-    cornerHarris_gpu(blockSize, static_cast<float>(k), Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
-}
-
-void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
-{
-    GpuMat Dx, Dy;
-    cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize, borderType);
-}
-
-void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)
-{
-    GpuMat buf;
-    cornerMinEigenVal(src, dst, Dx, Dy, buf, blockSize, ksize, borderType);
-}
-
-void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
-{
-    using namespace ::cv::gpu::cudev::imgproc;
-
-    CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
-
-    extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
-
-    dst.create(src.size(), CV_32F);
-
-    cornerMinEigenVal_gpu(blockSize, Dx, Dy, dst, borderType, StreamAccessor::getStream(stream));
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-// Canny
-
-void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)
-{
-    if (apperture_size > 0)
-    {
-        ensureSizeIsEnough(image_size, CV_32SC1, dx);
-        ensureSizeIsEnough(image_size, CV_32SC1, dy);
-
-        if (apperture_size != 3)
-        {
-            filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
-            filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
-        }
-    }
-
-    ensureSizeIsEnough(image_size, CV_32FC1, mag);
-    ensureSizeIsEnough(image_size, CV_32SC1, map);
-
-    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st1);
-    ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st2);
-}
-
-void cv::gpu::CannyBuf::release()
-{
-    dx.release();
-    dy.release();
-    mag.release();
-    map.release();
-    st1.release();
-    st2.release();
-}
-
-namespace canny
-{
-    void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
-    void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
-
-    void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh);
-
-    void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1);
-
-    void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2);
-
-    void getEdges(PtrStepSzi map, PtrStepSzb dst);
-}
-
-namespace
-{
-    void CannyCaller(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
-    {
-        using namespace canny;
-
-        buf.map.setTo(Scalar::all(0));
-        calcMap(dx, dy, buf.mag, buf.map, low_thresh, high_thresh);
-
-        edgesHysteresisLocal(buf.map, buf.st1.ptr<ushort2>());
-
-        edgesHysteresisGlobal(buf.map, buf.st1.ptr<ushort2>(), buf.st2.ptr<ushort2>());
-
-        getEdges(buf.map, dst);
-    }
-}
-
-void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
-{
-    CannyBuf buf;
-    Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
-}
-
-void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
-{
-    using namespace canny;
-
-    CV_Assert(src.type() == CV_8UC1);
-
-    if (!deviceSupports(SHARED_ATOMICS))
-        CV_Error(cv::Error::StsNotImplemented, "The device doesn't support shared atomics");
-
-    if( low_thresh > high_thresh )
-        std::swap( low_thresh, high_thresh);
-
-    dst.create(src.size(), CV_8U);
-    buf.create(src.size(), apperture_size);
-
-    if (apperture_size == 3)
-    {
-        Size wholeSize;
-        Point ofs;
-        src.locateROI(wholeSize, ofs);
-        GpuMat srcWhole(wholeSize, src.type(), src.datastart, src.step);
-
-        calcMagnitude(srcWhole, ofs.x, ofs.y, buf.dx, buf.dy, buf.mag, L2gradient);
-    }
-    else
-    {
-        buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows));
-        buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows));
-
-        calcMagnitude(buf.dx, buf.dy, buf.mag, L2gradient);
-    }
-
-    CannyCaller(buf.dx, buf.dy, buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
-}
-
-void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
-{
-    CannyBuf buf;
-    Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
-}
-
-void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
-{
-    using namespace canny;
-
-    CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
-    CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
-
-    if( low_thresh > high_thresh )
-        std::swap( low_thresh, high_thresh);
-
-    dst.create(dx.size(), CV_8U);
-    buf.create(dx.size(), -1);
-
-    calcMagnitude(dx, dy, buf.mag, L2gradient);
-
-    CannyCaller(dx, dy, buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
-}
-
 ////////////////////////////////////////////////////////////////////////
 // CLAHE
 
@@ -793,7 +496,11 @@ namespace
         }
         else
         {
+#ifndef HAVE_OPENCV_GPUARITHM
+            throw_no_cuda();
+#else
             cv::gpu::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar(), s);
+#endif
 
             tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
             srcForLut = srcExt_;
@@ -847,77 +554,4 @@ cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double clipLimit, cv::Size tileGrid
     return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
 }
 
-////////////////////////////////////////////////////////////////////////
-// alphaComp
-
-namespace
-{
-    template <int DEPTH> struct NppAlphaCompFunc
-    {
-        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
-
-        typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, NppiAlphaOp eAlphaOp);
-    };
-
-    template <int DEPTH, typename NppAlphaCompFunc<DEPTH>::func_t func> struct NppAlphaComp
-    {
-        typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
-
-        static void call(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream)
-        {
-            NppStreamHandler h(stream);
-
-            NppiSize oSizeROI;
-            oSizeROI.width = img1.cols;
-            oSizeROI.height = img2.rows;
-
-            nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
-                              dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-    };
-}
-
-void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream)
-{
-    static const NppiAlphaOp npp_alpha_ops[] = {
-        NPPI_OP_ALPHA_OVER,
-        NPPI_OP_ALPHA_IN,
-        NPPI_OP_ALPHA_OUT,
-        NPPI_OP_ALPHA_ATOP,
-        NPPI_OP_ALPHA_XOR,
-        NPPI_OP_ALPHA_PLUS,
-        NPPI_OP_ALPHA_OVER_PREMUL,
-        NPPI_OP_ALPHA_IN_PREMUL,
-        NPPI_OP_ALPHA_OUT_PREMUL,
-        NPPI_OP_ALPHA_ATOP_PREMUL,
-        NPPI_OP_ALPHA_XOR_PREMUL,
-        NPPI_OP_ALPHA_PLUS_PREMUL,
-        NPPI_OP_ALPHA_PREMUL
-    };
-
-    typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
-
-    static const func_t funcs[] =
-    {
-        NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
-        0,
-        NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call,
-        0,
-        NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call,
-        NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
-    };
-
-    CV_Assert( img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4 );
-    CV_Assert( img1.size() == img2.size() && img1.type() == img2.type() );
-
-    dst.create(img1.size(), img1.type());
-
-    const func_t func = funcs[img1.depth()];
-
-    func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream));
-}
-
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/match_template.cpp b/modules/gpuimgproc/src/match_template.cpp
index d78828bf1..17d7b7686 100644
--- a/modules/gpuimgproc/src/match_template.cpp
+++ b/modules/gpuimgproc/src/match_template.cpp
@@ -45,7 +45,7 @@
 using namespace cv;
 using namespace cv::gpu;
 
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPUARITHM) || defined (CUDA_DISABLER)
 
 void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 
@@ -172,15 +172,15 @@ namespace
             return;
         }
 
-        ConvolveBuf convolve_buf;
+        gpu::ConvolveBuf convolve_buf;
         convolve_buf.user_block_size = buf.user_block_size;
 
         if (image.channels() == 1)
-            convolve(image.reshape(1), templ.reshape(1), result, true, convolve_buf, stream);
+            gpu::convolve(image.reshape(1), templ.reshape(1), result, true, convolve_buf, stream);
         else
         {
             GpuMat result_;
-            convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf, stream);
+            gpu::convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf, stream);
             extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream));
         }
     }
@@ -216,9 +216,9 @@ namespace
         matchTemplate_CCORR_8U(image, templ, result, buf, stream);
 
         buf.image_sqsums.resize(1);
-        sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
+        gpu::sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
 
-        unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
+        unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ.reshape(1))[0];
         normalize_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
     }
 
@@ -243,9 +243,9 @@ namespace
         }
 
         buf.image_sqsums.resize(1);
-        sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
+        gpu::sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
 
-        unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
+        unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ.reshape(1))[0];
 
         matchTemplate_CCORR_8U(image, templ, result, buf, stream);
         matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
@@ -256,9 +256,9 @@ namespace
             const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
     {
         buf.image_sqsums.resize(1);
-        sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
+        gpu::sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
 
-        unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
+        unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ.reshape(1))[0];
 
         matchTemplate_CCORR_8U(image, templ, result, buf, stream);
         matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
@@ -273,19 +273,19 @@ namespace
         if (image.channels() == 1)
         {
             buf.image_sums.resize(1);
-            integral(image, buf.image_sums[0], stream);
+            gpu::integral(image, buf.image_sums[0], stream);
 
             unsigned int templ_sum = (unsigned int)sum(templ)[0];
             matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, buf.image_sums[0], templ_sum, result, StreamAccessor::getStream(stream));
         }
         else
         {
-            split(image, buf.images);
+            gpu::split(image, buf.images);
             buf.image_sums.resize(buf.images.size());
             for (int i = 0; i < image.channels(); ++i)
-                integral(buf.images[i], buf.image_sums[i], stream);
+                gpu::integral(buf.images[i], buf.image_sums[i], stream);
 
-            Scalar templ_sum = sum(templ);
+            Scalar templ_sum = gpu::sum(templ);
 
             switch (image.channels())
             {
@@ -333,12 +333,12 @@ namespace
         if (image.channels() == 1)
         {
             buf.image_sums.resize(1);
-            integral(image, buf.image_sums[0], stream);
+            gpu::integral(image, buf.image_sums[0], stream);
             buf.image_sqsums.resize(1);
-            sqrIntegral(image, buf.image_sqsums[0], stream);
+            gpu::sqrIntegral(image, buf.image_sqsums[0], stream);
 
-            unsigned int templ_sum = (unsigned int)sum(templ)[0];
-            unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ)[0];
+            unsigned int templ_sum = (unsigned int)gpu::sum(templ)[0];
+            unsigned long long templ_sqsum = (unsigned long long)gpu::sqrSum(templ)[0];
 
             matchTemplatePrepared_CCOFF_NORMED_8U(
                     templ.cols, templ.rows, buf.image_sums[0], buf.image_sqsums[0],
@@ -346,17 +346,17 @@ namespace
         }
         else
         {
-            split(image, buf.images);
+            gpu::split(image, buf.images);
             buf.image_sums.resize(buf.images.size());
             buf.image_sqsums.resize(buf.images.size());
             for (int i = 0; i < image.channels(); ++i)
             {
-                integral(buf.images[i], buf.image_sums[i], stream);
-                sqrIntegral(buf.images[i], buf.image_sqsums[i], stream);
+                gpu::integral(buf.images[i], buf.image_sums[i], stream);
+                gpu::sqrIntegral(buf.images[i], buf.image_sqsums[i], stream);
             }
 
-            Scalar templ_sum = sum(templ);
-            Scalar templ_sqsum = sqrSum(templ);
+            Scalar templ_sum = gpu::sum(templ);
+            Scalar templ_sqsum = gpu::sqrSum(templ);
 
             switch (image.channels())
             {
diff --git a/modules/gpuimgproc/src/mean_shift.cpp b/modules/gpuimgproc/src/mean_shift.cpp
new file mode 100644
index 000000000..e30f95bf9
--- /dev/null
+++ b/modules/gpuimgproc/src/mean_shift.cpp
@@ -0,0 +1,128 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
+void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
+
+#else /* !defined (HAVE_CUDA) */
+
+////////////////////////////////////////////////////////////////////////
+// meanShiftFiltering_GPU
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
+    }
+}}}
+
+void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
+{
+    using namespace ::cv::gpu::cudev::imgproc;
+
+    if( src.empty() )
+        CV_Error( cv::Error::StsBadArg, "The input image is empty" );
+
+    if( src.depth() != CV_8U || src.channels() != 4 )
+        CV_Error( cv::Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
+
+    dst.create( src.size(), CV_8UC4 );
+
+    if( !(criteria.type & TermCriteria::MAX_ITER) )
+        criteria.maxCount = 5;
+
+    int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
+
+    float eps;
+    if( !(criteria.type & TermCriteria::EPS) )
+        eps = 1.f;
+    eps = (float)std::max(criteria.epsilon, 0.0);
+
+    meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
+}
+
+////////////////////////////////////////////////////////////////////////
+// meanShiftProc_GPU
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
+    }
+}}}
+
+void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
+{
+    using namespace ::cv::gpu::cudev::imgproc;
+
+    if( src.empty() )
+        CV_Error( cv::Error::StsBadArg, "The input image is empty" );
+
+    if( src.depth() != CV_8U || src.channels() != 4 )
+        CV_Error( cv::Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
+
+    dstr.create( src.size(), CV_8UC4 );
+    dstsp.create( src.size(), CV_16SC2 );
+
+    if( !(criteria.type & TermCriteria::MAX_ITER) )
+        criteria.maxCount = 5;
+
+    int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
+
+    float eps;
+    if( !(criteria.type & TermCriteria::EPS) )
+        eps = 1.f;
+    eps = (float)std::max(criteria.epsilon, 0.0);
+
+    meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
+}
+
+#endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpuimgproc/src/precomp.hpp b/modules/gpuimgproc/src/precomp.hpp
index 93bcf3a28..00cca1701 100644
--- a/modules/gpuimgproc/src/precomp.hpp
+++ b/modules/gpuimgproc/src/precomp.hpp
@@ -45,9 +45,14 @@
 
 #include "opencv2/gpuimgproc.hpp"
 #include "opencv2/gpufilters.hpp"
-#include "opencv2/gpuarithm.hpp"
 
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/gpu_private.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
+#endif
+
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpuimgproc/test/test_bilateral_filter.cpp b/modules/gpuimgproc/test/test_bilateral_filter.cpp
new file mode 100644
index 000000000..23dd3b85d
--- /dev/null
+++ b/modules/gpuimgproc/test/test_bilateral_filter.cpp
@@ -0,0 +1,97 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+////////////////////////////////////////////////////////
+// BilateralFilter
+
+PARAM_TEST_CASE(BilateralFilter, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    int kernel_size;
+    float sigma_color;
+    float sigma_spatial;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+
+        kernel_size = 5;
+        sigma_color = 10.f;
+        sigma_spatial = 3.5f;
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(BilateralFilter, Accuracy)
+{
+    cv::Mat src = randomMat(size, type);
+
+    src.convertTo(src, type);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bilateralFilter(loadMat(src), dst, kernel_size, sigma_color, sigma_spatial);
+
+    cv::Mat dst_gold;
+    cv::bilateralFilter(src, dst_gold, kernel_size, sigma_color, sigma_spatial);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, BilateralFilter, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(639, 481)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_32FC1), MatType(CV_32FC3))
+    ));
+
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_blend.cpp b/modules/gpuimgproc/test/test_blend.cpp
new file mode 100644
index 000000000..87359b500
--- /dev/null
+++ b/modules/gpuimgproc/test/test_blend.cpp
@@ -0,0 +1,124 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+////////////////////////////////////////////////////////////////////////////
+// Blend
+
+namespace
+{
+    template <typename T>
+    void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
+    {
+        result_gold.create(img1.size(), img1.type());
+
+        int cn = img1.channels();
+
+        for (int y = 0; y < img1.rows; ++y)
+        {
+            const float* weights1_row = weights1.ptr<float>(y);
+            const float* weights2_row = weights2.ptr<float>(y);
+            const T* img1_row = img1.ptr<T>(y);
+            const T* img2_row = img2.ptr<T>(y);
+            T* result_gold_row = result_gold.ptr<T>(y);
+
+            for (int x = 0; x < img1.cols * cn; ++x)
+            {
+                float w1 = weights1_row[x / cn];
+                float w2 = weights2_row[x / cn];
+                result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
+            }
+        }
+    }
+}
+
+PARAM_TEST_CASE(Blend, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    int type;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        type = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Blend, Accuracy)
+{
+    int depth = CV_MAT_DEPTH(type);
+
+    cv::Mat img1 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
+    cv::Mat img2 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
+    cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
+    cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
+
+    cv::gpu::GpuMat result;
+    cv::gpu::blendLinear(loadMat(img1, useRoi), loadMat(img2, useRoi), loadMat(weights1, useRoi), loadMat(weights2, useRoi), result);
+
+    cv::Mat result_gold;
+    if (depth == CV_8U)
+        blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
+    else
+        blendLinearGold<float>(img1, img2, weights1, weights2, result_gold);
+
+    EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.0 : 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    WHOLE_SUBMAT));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_canny.cpp b/modules/gpuimgproc/test/test_canny.cpp
new file mode 100644
index 000000000..b3ab5addc
--- /dev/null
+++ b/modules/gpuimgproc/test/test_canny.cpp
@@ -0,0 +1,114 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+////////////////////////////////////////////////////////
+// Canny
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(AppertureSize, int)
+    IMPLEMENT_PARAM_CLASS(L2gradient, bool)
+}
+
+PARAM_TEST_CASE(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    int apperture_size;
+    bool useL2gradient;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        apperture_size = GET_PARAM(1);
+        useL2gradient = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(Canny, Accuracy)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    double low_thresh = 50.0;
+    double high_thresh = 100.0;
+
+    if (!supportFeature(devInfo, cv::gpu::SHARED_ATOMICS))
+    {
+        try
+        {
+        cv::gpu::GpuMat edges;
+        cv::gpu::Canny(loadMat(img), edges, low_thresh, high_thresh, apperture_size, useL2gradient);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat edges;
+        cv::gpu::Canny(loadMat(img, useRoi), edges, low_thresh, high_thresh, apperture_size, useL2gradient);
+
+        cv::Mat edges_gold;
+        cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
+
+        EXPECT_MAT_SIMILAR(edges_gold, edges, 2e-2);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(AppertureSize(3), AppertureSize(5)),
+    testing::Values(L2gradient(false), L2gradient(true)),
+    WHOLE_SUBMAT));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_corners.cpp b/modules/gpuimgproc/test/test_corners.cpp
new file mode 100644
index 000000000..54d8df457
--- /dev/null
+++ b/modules/gpuimgproc/test/test_corners.cpp
@@ -0,0 +1,145 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// CornerHarris
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(BlockSize, int);
+    IMPLEMENT_PARAM_CLASS(ApertureSize, int);
+}
+
+PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int borderType;
+    int blockSize;
+    int apertureSize;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        borderType = GET_PARAM(2);
+        blockSize = GET_PARAM(3);
+        apertureSize = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(CornerHarris, Accuracy)
+{
+    cv::Mat src = readImageType("stereobm/aloe-L.png", type);
+    ASSERT_FALSE(src.empty());
+
+    double k = randomDouble(0.1, 0.9);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cornerHarris(loadMat(src), dst, blockSize, apertureSize, k, borderType);
+
+    cv::Mat dst_gold;
+    cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.02);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerHarris, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// cornerMinEigen
+
+PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int borderType;
+    int blockSize;
+    int apertureSize;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        borderType = GET_PARAM(2);
+        blockSize = GET_PARAM(3);
+        apertureSize = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(CornerMinEigen, Accuracy)
+{
+    cv::Mat src = readImageType("stereobm/aloe-L.png", type);
+    ASSERT_FALSE(src.empty());
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cornerMinEigenVal(loadMat(src), dst, blockSize, apertureSize, borderType);
+
+    cv::Mat dst_gold;
+    cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.02);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerMinEigen, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_gftt.cpp b/modules/gpuimgproc/test/test_gftt.cpp
new file mode 100644
index 000000000..b20df33ae
--- /dev/null
+++ b/modules/gpuimgproc/test/test_gftt.cpp
@@ -0,0 +1,131 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(MinDistance, double)
+}
+
+PARAM_TEST_CASE(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
+{
+    cv::gpu::DeviceInfo devInfo;
+    double minDistance;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        minDistance = GET_PARAM(1);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(GoodFeaturesToTrack, Accuracy)
+{
+    cv::Mat image = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    int maxCorners = 1000;
+    double qualityLevel = 0.01;
+
+    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(maxCorners, qualityLevel, minDistance);
+
+    cv::gpu::GpuMat d_pts;
+    detector(loadMat(image), d_pts);
+
+    ASSERT_FALSE(d_pts.empty());
+
+    std::vector<cv::Point2f> pts(d_pts.cols);
+    cv::Mat pts_mat(1, d_pts.cols, CV_32FC2, (void*) &pts[0]);
+    d_pts.download(pts_mat);
+
+    std::vector<cv::Point2f> pts_gold;
+    cv::goodFeaturesToTrack(image, pts_gold, maxCorners, qualityLevel, minDistance);
+
+    ASSERT_EQ(pts_gold.size(), pts.size());
+
+    size_t mistmatch = 0;
+    for (size_t i = 0; i < pts.size(); ++i)
+    {
+        cv::Point2i a = pts_gold[i];
+        cv::Point2i b = pts[i];
+
+        bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
+
+        if (!eq)
+            ++mistmatch;
+    }
+
+    double bad_ratio = static_cast<double>(mistmatch) / pts.size();
+
+    ASSERT_LE(bad_ratio, 0.01);
+}
+
+GPU_TEST_P(GoodFeaturesToTrack, EmptyCorners)
+{
+    int maxCorners = 1000;
+    double qualityLevel = 0.01;
+
+    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(maxCorners, qualityLevel, minDistance);
+
+    cv::gpu::GpuMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
+    cv::gpu::GpuMat corners(1, maxCorners, CV_32FC2);
+
+    detector(src, corners);
+
+    ASSERT_TRUE(corners.empty());
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, GoodFeaturesToTrack, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MinDistance(0.0), MinDistance(3.0))));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_histogram.cpp b/modules/gpuimgproc/test/test_histogram.cpp
new file mode 100644
index 000000000..c3d17d2a1
--- /dev/null
+++ b/modules/gpuimgproc/test/test_histogram.cpp
@@ -0,0 +1,227 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// HistEven
+
+struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(HistEven, Accuracy)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat hsv;
+    cv::cvtColor(img, hsv, cv::COLOR_BGR2HSV);
+
+    int hbins = 30;
+    float hranges[] = {0.0f, 180.0f};
+
+    std::vector<cv::gpu::GpuMat> srcs;
+    cv::gpu::split(loadMat(hsv), srcs);
+
+    cv::gpu::GpuMat hist;
+    cv::gpu::histEven(srcs[0], hist, hbins, (int)hranges[0], (int)hranges[1]);
+
+    cv::MatND histnd;
+    int histSize[] = {hbins};
+    const float* ranges[] = {hranges};
+    int channels[] = {0};
+    cv::calcHist(&hsv, 1, channels, cv::Mat(), histnd, 1, histSize, ranges);
+
+    cv::Mat hist_gold = histnd;
+    hist_gold = hist_gold.t();
+    hist_gold.convertTo(hist_gold, CV_32S);
+
+    EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HistEven, ALL_DEVICES);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// CalcHist
+
+namespace
+{
+    void calcHistGold(const cv::Mat& src, cv::Mat& hist)
+    {
+        hist.create(1, 256, CV_32SC1);
+        hist.setTo(cv::Scalar::all(0));
+
+        int* hist_row = hist.ptr<int>();
+        for (int y = 0; y < src.rows; ++y)
+        {
+            const uchar* src_row = src.ptr(y);
+
+            for (int x = 0; x < src.cols; ++x)
+                ++hist_row[src_row[x]];
+        }
+    }
+}
+
+PARAM_TEST_CASE(CalcHist, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(CalcHist, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::gpu::GpuMat hist;
+    cv::gpu::calcHist(loadMat(src), hist);
+
+    cv::Mat hist_gold;
+    calcHistGold(src, hist_gold);
+
+    EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CalcHist, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// EqualizeHist
+
+PARAM_TEST_CASE(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(EqualizeHist, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::equalizeHist(loadMat(src), dst);
+
+    cv::Mat dst_gold;
+    cv::equalizeHist(src, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 3.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, EqualizeHist, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// CLAHE
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(ClipLimit, double)
+}
+
+PARAM_TEST_CASE(CLAHE, cv::gpu::DeviceInfo, cv::Size, ClipLimit)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    double clipLimit;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        clipLimit = GET_PARAM(2);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(CLAHE, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::Ptr<cv::gpu::CLAHE> clahe = cv::gpu::createCLAHE(clipLimit);
+    cv::gpu::GpuMat dst;
+    clahe->apply(loadMat(src), dst);
+
+    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
+    cv::Mat dst_gold;
+    clahe_gold->apply(src, dst_gold);
+
+    ASSERT_MAT_NEAR(dst_gold, dst, 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CLAHE, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(0.0, 40.0)));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_imgproc.cpp b/modules/gpuimgproc/test/test_imgproc.cpp
deleted file mode 100644
index 0fa1d0ffa..000000000
--- a/modules/gpuimgproc/test/test_imgproc.cpp
+++ /dev/null
@@ -1,890 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#ifdef HAVE_CUDA
-
-using namespace cvtest;
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// HistEven
-
-struct HistEven : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(HistEven, Accuracy)
-{
-    cv::Mat img = readImage("stereobm/aloe-L.png");
-    ASSERT_FALSE(img.empty());
-
-    cv::Mat hsv;
-    cv::cvtColor(img, hsv, cv::COLOR_BGR2HSV);
-
-    int hbins = 30;
-    float hranges[] = {0.0f, 180.0f};
-
-    std::vector<cv::gpu::GpuMat> srcs;
-    cv::gpu::split(loadMat(hsv), srcs);
-
-    cv::gpu::GpuMat hist;
-    cv::gpu::histEven(srcs[0], hist, hbins, (int)hranges[0], (int)hranges[1]);
-
-    cv::MatND histnd;
-    int histSize[] = {hbins};
-    const float* ranges[] = {hranges};
-    int channels[] = {0};
-    cv::calcHist(&hsv, 1, channels, cv::Mat(), histnd, 1, histSize, ranges);
-
-    cv::Mat hist_gold = histnd;
-    hist_gold = hist_gold.t();
-    hist_gold.convertTo(hist_gold, CV_32S);
-
-    EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HistEven, ALL_DEVICES);
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// CalcHist
-
-namespace
-{
-    void calcHistGold(const cv::Mat& src, cv::Mat& hist)
-    {
-        hist.create(1, 256, CV_32SC1);
-        hist.setTo(cv::Scalar::all(0));
-
-        int* hist_row = hist.ptr<int>();
-        for (int y = 0; y < src.rows; ++y)
-        {
-            const uchar* src_row = src.ptr(y);
-
-            for (int x = 0; x < src.cols; ++x)
-                ++hist_row[src_row[x]];
-        }
-    }
-}
-
-PARAM_TEST_CASE(CalcHist, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    cv::Size size;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(CalcHist, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_8UC1);
-
-    cv::gpu::GpuMat hist;
-    cv::gpu::calcHist(loadMat(src), hist);
-
-    cv::Mat hist_gold;
-    calcHistGold(src, hist_gold);
-
-    EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CalcHist, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES));
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// EqualizeHist
-
-PARAM_TEST_CASE(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(EqualizeHist, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_8UC1);
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::equalizeHist(loadMat(src), dst);
-
-    cv::Mat dst_gold;
-    cv::equalizeHist(src, dst_gold);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 3.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, EqualizeHist, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES));
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// CLAHE
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(ClipLimit, double)
-}
-
-PARAM_TEST_CASE(CLAHE, cv::gpu::DeviceInfo, cv::Size, ClipLimit)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    double clipLimit;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        clipLimit = GET_PARAM(2);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(CLAHE, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_8UC1);
-
-    cv::Ptr<cv::gpu::CLAHE> clahe = cv::gpu::createCLAHE(clipLimit);
-    cv::gpu::GpuMat dst;
-    clahe->apply(loadMat(src), dst);
-
-    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
-    cv::Mat dst_gold;
-    clahe_gold->apply(src, dst_gold);
-
-    ASSERT_MAT_NEAR(dst_gold, dst, 1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CLAHE, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(0.0, 40.0)));
-
-////////////////////////////////////////////////////////
-// Canny
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(AppertureSize, int);
-    IMPLEMENT_PARAM_CLASS(L2gradient, bool);
-}
-
-PARAM_TEST_CASE(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    int apperture_size;
-    bool useL2gradient;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        apperture_size = GET_PARAM(1);
-        useL2gradient = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Canny, Accuracy)
-{
-    cv::Mat img = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    double low_thresh = 50.0;
-    double high_thresh = 100.0;
-
-    if (!supportFeature(devInfo, cv::gpu::SHARED_ATOMICS))
-    {
-        try
-        {
-        cv::gpu::GpuMat edges;
-        cv::gpu::Canny(loadMat(img), edges, low_thresh, high_thresh, apperture_size, useL2gradient);
-        }
-        catch (const cv::Exception& e)
-        {
-            ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
-        }
-    }
-    else
-    {
-        cv::gpu::GpuMat edges;
-        cv::gpu::Canny(loadMat(img, useRoi), edges, low_thresh, high_thresh, apperture_size, useL2gradient);
-
-        cv::Mat edges_gold;
-        cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
-
-        EXPECT_MAT_SIMILAR(edges_gold, edges, 2e-2);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(AppertureSize(3), AppertureSize(5)),
-    testing::Values(L2gradient(false), L2gradient(true)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// MeanShift
-
-struct MeanShift : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    cv::Mat img;
-
-    int spatialRad;
-    int colorRad;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-
-        img = readImageType("meanshift/cones.png", CV_8UC4);
-        ASSERT_FALSE(img.empty());
-
-        spatialRad = 30;
-        colorRad = 30;
-    }
-};
-
-GPU_TEST_P(MeanShift, Filtering)
-{
-    cv::Mat img_template;
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
-        img_template = readImage("meanshift/con_result.png");
-    else
-        img_template = readImage("meanshift/con_result_CC1X.png");
-    ASSERT_FALSE(img_template.empty());
-
-    cv::gpu::GpuMat d_dst;
-    cv::gpu::meanShiftFiltering(loadMat(img), d_dst, spatialRad, colorRad);
-
-    ASSERT_EQ(CV_8UC4, d_dst.type());
-
-    cv::Mat dst(d_dst);
-
-    cv::Mat result;
-    cv::cvtColor(dst, result, cv::COLOR_BGRA2BGR);
-
-    EXPECT_MAT_NEAR(img_template, result, 0.0);
-}
-
-GPU_TEST_P(MeanShift, Proc)
-{
-    cv::FileStorage fs;
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
-        fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
-    else
-        fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);
-    ASSERT_TRUE(fs.isOpened());
-
-    cv::Mat spmap_template;
-    fs["spmap"] >> spmap_template;
-    ASSERT_FALSE(spmap_template.empty());
-
-    cv::gpu::GpuMat rmap_filtered;
-    cv::gpu::meanShiftFiltering(loadMat(img), rmap_filtered, spatialRad, colorRad);
-
-    cv::gpu::GpuMat rmap;
-    cv::gpu::GpuMat spmap;
-    cv::gpu::meanShiftProc(loadMat(img), rmap, spmap, spatialRad, colorRad);
-
-    ASSERT_EQ(CV_8UC4, rmap.type());
-
-    EXPECT_MAT_NEAR(rmap_filtered, rmap, 0.0);
-    EXPECT_MAT_NEAR(spmap_template, spmap, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MeanShift, ALL_DEVICES);
-
-////////////////////////////////////////////////////////////////////////////////
-// MeanShiftSegmentation
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(MinSize, int);
-}
-
-PARAM_TEST_CASE(MeanShiftSegmentation, cv::gpu::DeviceInfo, MinSize)
-{
-    cv::gpu::DeviceInfo devInfo;
-    int minsize;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        minsize = GET_PARAM(1);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MeanShiftSegmentation, Regression)
-{
-    cv::Mat img = readImageType("meanshift/cones.png", CV_8UC4);
-    ASSERT_FALSE(img.empty());
-
-    std::ostringstream path;
-    path << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
-        path << ".png";
-    else
-        path << "_CC1X.png";
-    cv::Mat dst_gold = readImage(path.str());
-    ASSERT_FALSE(dst_gold.empty());
-
-    cv::Mat dst;
-    cv::gpu::meanShiftSegmentation(loadMat(img), dst, 10, 10, minsize);
-
-    cv::Mat dst_rgb;
-    cv::cvtColor(dst, dst_rgb, cv::COLOR_BGRA2BGR);
-
-    EXPECT_MAT_SIMILAR(dst_gold, dst_rgb, 1e-3);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MeanShiftSegmentation, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(MinSize(0), MinSize(4), MinSize(20), MinSize(84), MinSize(340), MinSize(1364))));
-
-////////////////////////////////////////////////////////////////////////////
-// Blend
-
-namespace
-{
-    template <typename T>
-    void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
-    {
-        result_gold.create(img1.size(), img1.type());
-
-        int cn = img1.channels();
-
-        for (int y = 0; y < img1.rows; ++y)
-        {
-            const float* weights1_row = weights1.ptr<float>(y);
-            const float* weights2_row = weights2.ptr<float>(y);
-            const T* img1_row = img1.ptr<T>(y);
-            const T* img2_row = img2.ptr<T>(y);
-            T* result_gold_row = result_gold.ptr<T>(y);
-
-            for (int x = 0; x < img1.cols * cn; ++x)
-            {
-                float w1 = weights1_row[x / cn];
-                float w2 = weights2_row[x / cn];
-                result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
-            }
-        }
-    }
-}
-
-PARAM_TEST_CASE(Blend, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-        useRoi = GET_PARAM(3);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(Blend, Accuracy)
-{
-    int depth = CV_MAT_DEPTH(type);
-
-    cv::Mat img1 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
-    cv::Mat img2 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0);
-    cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
-    cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
-
-    cv::gpu::GpuMat result;
-    cv::gpu::blendLinear(loadMat(img1, useRoi), loadMat(img2, useRoi), loadMat(weights1, useRoi), loadMat(weights2, useRoi), result);
-
-    cv::Mat result_gold;
-    if (depth == CV_8U)
-        blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
-    else
-        blendLinearGold<float>(img1, img2, weights1, weights2, result_gold);
-
-    EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.0 : 1e-5);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
-    WHOLE_SUBMAT));
-
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplate8U
-
-CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
-}
-
-PARAM_TEST_CASE(MatchTemplate8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    cv::Size templ_size;
-    int cn;
-    int method;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        templ_size = GET_PARAM(2);
-        cn = GET_PARAM(3);
-        method = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MatchTemplate8U, Accuracy)
-{
-    cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
-    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dst, method);
-
-    cv::Mat dst_gold;
-    cv::matchTemplate(image, templ, dst_gold, method);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1e-1);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
-    testing::Values(Channels(1), Channels(3), Channels(4)),
-    TemplateMethod::all()));
-
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplate32F
-
-PARAM_TEST_CASE(MatchTemplate32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    cv::Size templ_size;
-    int cn;
-    int method;
-
-    int n, m, h, w;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        templ_size = GET_PARAM(2);
-        cn = GET_PARAM(3);
-        method = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MatchTemplate32F, Regression)
-{
-    cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
-    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dst, method);
-
-    cv::Mat dst_gold;
-    cv::matchTemplate(image, templ, dst_gold, method);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1e-1);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
-    testing::Values(Channels(1), Channels(3), Channels(4)),
-    testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
-
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplateBlackSource
-
-PARAM_TEST_CASE(MatchTemplateBlackSource, cv::gpu::DeviceInfo, TemplateMethod)
-{
-    cv::gpu::DeviceInfo devInfo;
-    int method;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        method = GET_PARAM(1);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MatchTemplateBlackSource, Accuracy)
-{
-    cv::Mat image = readImage("matchtemplate/black.png");
-    ASSERT_FALSE(image.empty());
-
-    cv::Mat pattern = readImage("matchtemplate/cat.png");
-    ASSERT_FALSE(pattern.empty());
-
-    cv::gpu::GpuMat d_dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), d_dst, method);
-
-    cv::Mat dst(d_dst);
-
-    double maxValue;
-    cv::Point maxLoc;
-    cv::minMaxLoc(dst, NULL, &maxValue, NULL, &maxLoc);
-
-    cv::Point maxLocGold = cv::Point(284, 12);
-
-    ASSERT_EQ(maxLocGold, maxLoc);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplateBlackSource, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(TemplateMethod(cv::TM_CCOEFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED))));
-
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplate_CCOEF_NORMED
-
-PARAM_TEST_CASE(MatchTemplate_CCOEF_NORMED, cv::gpu::DeviceInfo, std::pair<std::string, std::string>)
-{
-    cv::gpu::DeviceInfo devInfo;
-    std::string imageName;
-    std::string patternName;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        imageName = GET_PARAM(1).first;
-        patternName = GET_PARAM(1).second;
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
-{
-    cv::Mat image = readImage(imageName);
-    ASSERT_FALSE(image.empty());
-
-    cv::Mat pattern = readImage(patternName);
-    ASSERT_FALSE(pattern.empty());
-
-    cv::gpu::GpuMat d_dst;
-    cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), d_dst, cv::TM_CCOEFF_NORMED);
-
-    cv::Mat dst(d_dst);
-
-    cv::Point minLoc, maxLoc;
-    double minVal, maxVal;
-    cv::minMaxLoc(dst, &minVal, &maxVal, &minLoc, &maxLoc);
-
-    cv::Mat dstGold;
-    cv::matchTemplate(image, pattern, dstGold, cv::TM_CCOEFF_NORMED);
-
-    double minValGold, maxValGold;
-    cv::Point minLocGold, maxLocGold;
-    cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold);
-
-    ASSERT_EQ(minLocGold, minLoc);
-    ASSERT_EQ(maxLocGold, maxLoc);
-    ASSERT_LE(maxVal, 1.0);
-    ASSERT_GE(minVal, -1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
-
-////////////////////////////////////////////////////////////////////////////////
-// MatchTemplate_CanFindBigTemplate
-
-struct MatchTemplate_CanFindBigTemplate : testing::TestWithParam<cv::gpu::DeviceInfo>
-{
-    cv::gpu::DeviceInfo devInfo;
-
-    virtual void SetUp()
-    {
-        devInfo = GetParam();
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED)
-{
-    cv::Mat scene = readImage("matchtemplate/scene.png");
-    ASSERT_FALSE(scene.empty());
-
-    cv::Mat templ = readImage("matchtemplate/template.png");
-    ASSERT_FALSE(templ.empty());
-
-    cv::gpu::GpuMat d_result;
-    cv::gpu::matchTemplate(loadMat(scene), loadMat(templ), d_result, cv::TM_SQDIFF_NORMED);
-
-    cv::Mat result(d_result);
-
-    double minVal;
-    cv::Point minLoc;
-    cv::minMaxLoc(result, &minVal, 0, &minLoc, 0);
-
-    ASSERT_GE(minVal, 0);
-    ASSERT_LT(minVal, 1e-3);
-    ASSERT_EQ(344, minLoc.x);
-    ASSERT_EQ(0, minLoc.y);
-}
-
-GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
-{
-    cv::Mat scene = readImage("matchtemplate/scene.png");
-    ASSERT_FALSE(scene.empty());
-
-    cv::Mat templ = readImage("matchtemplate/template.png");
-    ASSERT_FALSE(templ.empty());
-
-    cv::gpu::GpuMat d_result;
-    cv::gpu::matchTemplate(loadMat(scene), loadMat(templ), d_result, cv::TM_SQDIFF);
-
-    cv::Mat result(d_result);
-
-    double minVal;
-    cv::Point minLoc;
-    cv::minMaxLoc(result, &minVal, 0, &minLoc, 0);
-
-    ASSERT_GE(minVal, 0);
-    ASSERT_EQ(344, minLoc.x);
-    ASSERT_EQ(0, minLoc.y);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate_CanFindBigTemplate, ALL_DEVICES);
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// CornerHarris
-
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(BlockSize, int);
-    IMPLEMENT_PARAM_CLASS(ApertureSize, int);
-}
-
-PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
-{
-    cv::gpu::DeviceInfo devInfo;
-    int type;
-    int borderType;
-    int blockSize;
-    int apertureSize;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        type = GET_PARAM(1);
-        borderType = GET_PARAM(2);
-        blockSize = GET_PARAM(3);
-        apertureSize = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(CornerHarris, Accuracy)
-{
-    cv::Mat src = readImageType("stereobm/aloe-L.png", type);
-    ASSERT_FALSE(src.empty());
-
-    double k = randomDouble(0.1, 0.9);
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::cornerHarris(loadMat(src), dst, blockSize, apertureSize, k, borderType);
-
-    cv::Mat dst_gold;
-    cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.02);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerHarris, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
-    testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)),
-    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
-    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// cornerMinEigen
-
-PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
-{
-    cv::gpu::DeviceInfo devInfo;
-    int type;
-    int borderType;
-    int blockSize;
-    int apertureSize;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        type = GET_PARAM(1);
-        borderType = GET_PARAM(2);
-        blockSize = GET_PARAM(3);
-        apertureSize = GET_PARAM(4);
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(CornerMinEigen, Accuracy)
-{
-    cv::Mat src = readImageType("stereobm/aloe-L.png", type);
-    ASSERT_FALSE(src.empty());
-
-    cv::gpu::GpuMat dst;
-    cv::gpu::cornerMinEigenVal(loadMat(src), dst, blockSize, apertureSize, borderType);
-
-    cv::Mat dst_gold;
-    cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.02);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerMinEigen, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
-    testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)),
-    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
-    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
-
-////////////////////////////////////////////////////////
-// BilateralFilter
-
-PARAM_TEST_CASE(BilateralFilter, cv::gpu::DeviceInfo, cv::Size, MatType)
-{
-    cv::gpu::DeviceInfo devInfo;
-    cv::Size size;
-    int type;
-    int kernel_size;
-    float sigma_color;
-    float sigma_spatial;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        type = GET_PARAM(2);
-
-        kernel_size = 5;
-        sigma_color = 10.f;
-        sigma_spatial = 3.5f;
-
-        cv::gpu::setDevice(devInfo.deviceID());
-    }
-};
-
-GPU_TEST_P(BilateralFilter, Accuracy)
-{
-    cv::Mat src = randomMat(size, type);
-
-    src.convertTo(src, type);
-    cv::gpu::GpuMat dst;
-
-    cv::gpu::bilateralFilter(loadMat(src), dst, kernel_size, sigma_color, sigma_spatial);
-
-    cv::Mat dst_gold;
-    cv::bilateralFilter(src, dst_gold, kernel_size, sigma_color, sigma_spatial);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, BilateralFilter, testing::Combine(
-    ALL_DEVICES,
-    testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(639, 481)),
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_32FC1), MatType(CV_32FC3))
-    ));
-
-#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_match_template.cpp b/modules/gpuimgproc/test/test_match_template.cpp
new file mode 100644
index 000000000..d18757923
--- /dev/null
+++ b/modules/gpuimgproc/test/test_match_template.cpp
@@ -0,0 +1,305 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate8U
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_CCOEFF_NORMED))
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
+}
+
+PARAM_TEST_CASE(MatchTemplate8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    cv::Size templ_size;
+    int cn;
+    int method;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        templ_size = GET_PARAM(2);
+        cn = GET_PARAM(3);
+        method = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MatchTemplate8U, Accuracy)
+{
+    cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
+    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dst, method);
+
+    cv::Mat dst_gold;
+    cv::matchTemplate(image, templ, dst_gold, method);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1e-1);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    ALL_TEMPLATE_METHODS));
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate32F
+
+PARAM_TEST_CASE(MatchTemplate32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    cv::Size templ_size;
+    int cn;
+    int method;
+
+    int n, m, h, w;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        templ_size = GET_PARAM(2);
+        cn = GET_PARAM(3);
+        method = GET_PARAM(4);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MatchTemplate32F, Regression)
+{
+    cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
+    cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::matchTemplate(loadMat(image), loadMat(templ), dst, method);
+
+    cv::Mat dst_gold;
+    cv::matchTemplate(image, templ, dst_gold, method);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1e-1);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplateBlackSource
+
+PARAM_TEST_CASE(MatchTemplateBlackSource, cv::gpu::DeviceInfo, TemplateMethod)
+{
+    cv::gpu::DeviceInfo devInfo;
+    int method;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        method = GET_PARAM(1);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MatchTemplateBlackSource, Accuracy)
+{
+    cv::Mat image = readImage("matchtemplate/black.png");
+    ASSERT_FALSE(image.empty());
+
+    cv::Mat pattern = readImage("matchtemplate/cat.png");
+    ASSERT_FALSE(pattern.empty());
+
+    cv::gpu::GpuMat d_dst;
+    cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), d_dst, method);
+
+    cv::Mat dst(d_dst);
+
+    double maxValue;
+    cv::Point maxLoc;
+    cv::minMaxLoc(dst, NULL, &maxValue, NULL, &maxLoc);
+
+    cv::Point maxLocGold = cv::Point(284, 12);
+
+    ASSERT_EQ(maxLocGold, maxLoc);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplateBlackSource, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(TemplateMethod(cv::TM_CCOEFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED))));
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_CCOEF_NORMED
+
+PARAM_TEST_CASE(MatchTemplate_CCOEF_NORMED, cv::gpu::DeviceInfo, std::pair<std::string, std::string>)
+{
+    cv::gpu::DeviceInfo devInfo;
+    std::string imageName;
+    std::string patternName;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        imageName = GET_PARAM(1).first;
+        patternName = GET_PARAM(1).second;
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
+{
+    cv::Mat image = readImage(imageName);
+    ASSERT_FALSE(image.empty());
+
+    cv::Mat pattern = readImage(patternName);
+    ASSERT_FALSE(pattern.empty());
+
+    cv::gpu::GpuMat d_dst;
+    cv::gpu::matchTemplate(loadMat(image), loadMat(pattern), d_dst, cv::TM_CCOEFF_NORMED);
+
+    cv::Mat dst(d_dst);
+
+    cv::Point minLoc, maxLoc;
+    double minVal, maxVal;
+    cv::minMaxLoc(dst, &minVal, &maxVal, &minLoc, &maxLoc);
+
+    cv::Mat dstGold;
+    cv::matchTemplate(image, pattern, dstGold, cv::TM_CCOEFF_NORMED);
+
+    double minValGold, maxValGold;
+    cv::Point minLocGold, maxLocGold;
+    cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold);
+
+    ASSERT_EQ(minLocGold, minLoc);
+    ASSERT_EQ(maxLocGold, maxLoc);
+    ASSERT_LE(maxVal, 1.0);
+    ASSERT_GE(minVal, -1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_CanFindBigTemplate
+
+struct MatchTemplate_CanFindBigTemplate : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED)
+{
+    cv::Mat scene = readImage("matchtemplate/scene.png");
+    ASSERT_FALSE(scene.empty());
+
+    cv::Mat templ = readImage("matchtemplate/template.png");
+    ASSERT_FALSE(templ.empty());
+
+    cv::gpu::GpuMat d_result;
+    cv::gpu::matchTemplate(loadMat(scene), loadMat(templ), d_result, cv::TM_SQDIFF_NORMED);
+
+    cv::Mat result(d_result);
+
+    double minVal;
+    cv::Point minLoc;
+    cv::minMaxLoc(result, &minVal, 0, &minLoc, 0);
+
+    ASSERT_GE(minVal, 0);
+    ASSERT_LT(minVal, 1e-3);
+    ASSERT_EQ(344, minLoc.x);
+    ASSERT_EQ(0, minLoc.y);
+}
+
+GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
+{
+    cv::Mat scene = readImage("matchtemplate/scene.png");
+    ASSERT_FALSE(scene.empty());
+
+    cv::Mat templ = readImage("matchtemplate/template.png");
+    ASSERT_FALSE(templ.empty());
+
+    cv::gpu::GpuMat d_result;
+    cv::gpu::matchTemplate(loadMat(scene), loadMat(templ), d_result, cv::TM_SQDIFF);
+
+    cv::Mat result(d_result);
+
+    double minVal;
+    cv::Point minLoc;
+    cv::minMaxLoc(result, &minVal, 0, &minLoc, 0);
+
+    ASSERT_GE(minVal, 0);
+    ASSERT_EQ(344, minLoc.x);
+    ASSERT_EQ(0, minLoc.y);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate_CanFindBigTemplate, ALL_DEVICES);
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuimgproc/test/test_mean_shift.cpp b/modules/gpuimgproc/test/test_mean_shift.cpp
new file mode 100644
index 000000000..e9101802b
--- /dev/null
+++ b/modules/gpuimgproc/test/test_mean_shift.cpp
@@ -0,0 +1,174 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace cvtest;
+
+////////////////////////////////////////////////////////////////////////////////
+// MeanShift
+
+struct MeanShift : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Mat img;
+
+    int spatialRad;
+    int colorRad;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        img = readImageType("meanshift/cones.png", CV_8UC4);
+        ASSERT_FALSE(img.empty());
+
+        spatialRad = 30;
+        colorRad = 30;
+    }
+};
+
+GPU_TEST_P(MeanShift, Filtering)
+{
+    cv::Mat img_template;
+    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+        img_template = readImage("meanshift/con_result.png");
+    else
+        img_template = readImage("meanshift/con_result_CC1X.png");
+    ASSERT_FALSE(img_template.empty());
+
+    cv::gpu::GpuMat d_dst;
+    cv::gpu::meanShiftFiltering(loadMat(img), d_dst, spatialRad, colorRad);
+
+    ASSERT_EQ(CV_8UC4, d_dst.type());
+
+    cv::Mat dst(d_dst);
+
+    cv::Mat result;
+    cv::cvtColor(dst, result, cv::COLOR_BGRA2BGR);
+
+    EXPECT_MAT_NEAR(img_template, result, 0.0);
+}
+
+GPU_TEST_P(MeanShift, Proc)
+{
+    cv::FileStorage fs;
+    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+        fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
+    else
+        fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);
+    ASSERT_TRUE(fs.isOpened());
+
+    cv::Mat spmap_template;
+    fs["spmap"] >> spmap_template;
+    ASSERT_FALSE(spmap_template.empty());
+
+    cv::gpu::GpuMat rmap_filtered;
+    cv::gpu::meanShiftFiltering(loadMat(img), rmap_filtered, spatialRad, colorRad);
+
+    cv::gpu::GpuMat rmap;
+    cv::gpu::GpuMat spmap;
+    cv::gpu::meanShiftProc(loadMat(img), rmap, spmap, spatialRad, colorRad);
+
+    ASSERT_EQ(CV_8UC4, rmap.type());
+
+    EXPECT_MAT_NEAR(rmap_filtered, rmap, 0.0);
+    EXPECT_MAT_NEAR(spmap_template, spmap, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MeanShift, ALL_DEVICES);
+
+////////////////////////////////////////////////////////////////////////////////
+// MeanShiftSegmentation
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(MinSize, int);
+}
+
+PARAM_TEST_CASE(MeanShiftSegmentation, cv::gpu::DeviceInfo, MinSize)
+{
+    cv::gpu::DeviceInfo devInfo;
+    int minsize;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        minsize = GET_PARAM(1);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+GPU_TEST_P(MeanShiftSegmentation, Regression)
+{
+    cv::Mat img = readImageType("meanshift/cones.png", CV_8UC4);
+    ASSERT_FALSE(img.empty());
+
+    std::ostringstream path;
+    path << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
+    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+        path << ".png";
+    else
+        path << "_CC1X.png";
+    cv::Mat dst_gold = readImage(path.str());
+    ASSERT_FALSE(dst_gold.empty());
+
+    cv::Mat dst;
+    cv::gpu::meanShiftSegmentation(loadMat(img), dst, 10, 10, minsize);
+
+    cv::Mat dst_rgb;
+    cv::cvtColor(dst, dst_rgb, cv::COLOR_BGRA2BGR);
+
+    EXPECT_MAT_SIMILAR(dst_gold, dst_rgb, 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MeanShiftSegmentation, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MinSize(0), MinSize(4), MinSize(20), MinSize(84), MinSize(340), MinSize(1364))));
+
+#endif // HAVE_CUDA

From f531dd839c456c7ad5c7e345c20a539f40b7253f Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:46:51 +0400
Subject: [PATCH 36/49] gpubgsegm module fixes

---
 modules/gpubgsegm/CMakeLists.txt              |   4 +-
 .../gpubgsegm/doc/background_segmentation.rst | 291 +-----------------
 modules/gpubgsegm/doc/gpubgsegm.rst           |   6 +-
 .../gpubgsegm/include/opencv2/gpubgsegm.hpp   |   4 +
 .../perf/{perf_bgfg.cpp => perf_bgsegm.cpp}   |  11 +-
 modules/gpubgsegm/perf/perf_main.cpp          |   2 +-
 modules/gpubgsegm/perf/perf_precomp.hpp       |   2 -
 .../src/cuda/{fgd_bgfg.cu => fgd.cu}          |   2 +-
 .../src/cuda/{fgd_bgfg_common.hpp => fgd.hpp} |   0
 .../src/cuda/{bgfg_gmg.cu => gmg.cu}          |   0
 .../src/cuda/{bgfg_mog.cu => mog.cu}          |   0
 .../gpubgsegm/src/{fgd_bgfg.cpp => fgd.cpp}   |   2 +-
 .../gpubgsegm/src/{bgfg_gmg.cpp => gmg.cpp}   |   0
 .../gpubgsegm/src/{bgfg_mog.cpp => mog.cpp}   |   0
 modules/gpubgsegm/src/precomp.hpp             |  13 -
 .../test/{test_bgfg.cpp => test_bgsegm.cpp}   |   8 +-
 modules/gpubgsegm/test/test_precomp.hpp       |   2 -
 17 files changed, 24 insertions(+), 323 deletions(-)
 rename modules/gpubgsegm/perf/{perf_bgfg.cpp => perf_bgsegm.cpp} (98%)
 rename modules/gpubgsegm/src/cuda/{fgd_bgfg.cu => fgd.cu} (99%)
 rename modules/gpubgsegm/src/cuda/{fgd_bgfg_common.hpp => fgd.hpp} (100%)
 rename modules/gpubgsegm/src/cuda/{bgfg_gmg.cu => gmg.cu} (100%)
 rename modules/gpubgsegm/src/cuda/{bgfg_mog.cu => mog.cu} (100%)
 rename modules/gpubgsegm/src/{fgd_bgfg.cpp => fgd.cpp} (99%)
 rename modules/gpubgsegm/src/{bgfg_gmg.cpp => gmg.cpp} (100%)
 rename modules/gpubgsegm/src/{bgfg_mog.cpp => mog.cpp} (100%)
 rename modules/gpubgsegm/test/{test_bgfg.cpp => test_bgsegm.cpp} (97%)

diff --git a/modules/gpubgsegm/CMakeLists.txt b/modules/gpubgsegm/CMakeLists.txt
index 4e3266b7e..f37ec595c 100644
--- a/modules/gpubgsegm/CMakeLists.txt
+++ b/modules/gpubgsegm/CMakeLists.txt
@@ -4,6 +4,6 @@ endif()
 
 set(the_description "GPU-accelerated Background Segmentation")
 
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
 
-ocv_define_module(gpubgsegm opencv_video opencv_legacy opencv_gpufilters opencv_gpuimgproc)
+ocv_define_module(gpubgsegm opencv_video opencv_imgproc opencv_legacy opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc)
diff --git a/modules/gpubgsegm/doc/background_segmentation.rst b/modules/gpubgsegm/doc/background_segmentation.rst
index bb7c8263e..f892ee666 100644
--- a/modules/gpubgsegm/doc/background_segmentation.rst
+++ b/modules/gpubgsegm/doc/background_segmentation.rst
@@ -1,296 +1,10 @@
-Video Analysis
-==============
+Background Segmentation
+=======================
 
 .. highlight:: cpp
 
 
 
-gpu::BroxOpticalFlow
---------------------
-.. ocv:class:: gpu::BroxOpticalFlow
-
-Class computing the optical flow for two images using Brox et al Optical Flow algorithm ([Brox2004]_). ::
-
-    class BroxOpticalFlow
-    {
-    public:
-        BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_);
-
-        //! Compute optical flow
-        //! frame0 - source frame (supports only CV_32FC1 type)
-        //! frame1 - frame to track (with the same size and type as frame0)
-        //! u      - flow horizontal component (along x axis)
-        //! v      - flow vertical component (along y axis)
-        void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
-
-        //! flow smoothness
-        float alpha;
-
-        //! gradient constancy importance
-        float gamma;
-
-        //! pyramid scale factor
-        float scale_factor;
-
-        //! number of lagged non-linearity iterations (inner loop)
-        int inner_iterations;
-
-        //! number of warping iterations (number of pyramid levels)
-        int outer_iterations;
-
-        //! number of linear system solver iterations
-        int solver_iterations;
-
-        GpuMat buf;
-    };
-
-
-
-gpu::GoodFeaturesToTrackDetector_GPU
-------------------------------------
-.. ocv:class:: gpu::GoodFeaturesToTrackDetector_GPU
-
-Class used for strong corners detection on an image. ::
-
-    class GoodFeaturesToTrackDetector_GPU
-    {
-    public:
-        explicit GoodFeaturesToTrackDetector_GPU(int maxCorners_ = 1000, double qualityLevel_ = 0.01, double minDistance_ = 0.0,
-            int blockSize_ = 3, bool useHarrisDetector_ = false, double harrisK_ = 0.04);
-
-        void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
-
-        int maxCorners;
-        double qualityLevel;
-        double minDistance;
-
-        int blockSize;
-        bool useHarrisDetector;
-        double harrisK;
-
-        void releaseMemory();
-    };
-
-The class finds the most prominent corners in the image.
-
-.. seealso:: :ocv:func:`goodFeaturesToTrack`
-
-
-
-gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU
----------------------------------------------------------------------
-Constructor.
-
-.. ocv:function:: gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04)
-
-    :param maxCorners: Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned.
-
-    :param qualityLevel: Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see  :ocv:func:`gpu::cornerMinEigenVal` ) or the Harris function response (see  :ocv:func:`gpu::cornerHarris` ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the  ``qualityLevel=0.01`` , then all the corners with the quality measure less than 15 are rejected.
-
-    :param minDistance: Minimum possible Euclidean distance between the returned corners.
-
-    :param blockSize: Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See  :ocv:func:`cornerEigenValsAndVecs` .
-
-    :param useHarrisDetector: Parameter indicating whether to use a Harris detector (see :ocv:func:`gpu::cornerHarris`) or :ocv:func:`gpu::cornerMinEigenVal`.
-
-    :param harrisK: Free parameter of the Harris detector.
-
-
-
-gpu::GoodFeaturesToTrackDetector_GPU::operator ()
--------------------------------------------------
-Finds the most prominent corners in the image.
-
-.. ocv:function:: void gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat())
-
-    :param image: Input 8-bit, single-channel image.
-
-    :param corners: Output vector of detected corners (it will be one row matrix with CV_32FC2 type).
-
-    :param mask: Optional region of interest. If the image is not empty (it needs to have the type  ``CV_8UC1``  and the same size as  ``image`` ), it  specifies the region in which the corners are detected.
-
-.. seealso:: :ocv:func:`goodFeaturesToTrack`
-
-
-
-gpu::GoodFeaturesToTrackDetector_GPU::releaseMemory
----------------------------------------------------
-Releases inner buffers memory.
-
-.. ocv:function:: void gpu::GoodFeaturesToTrackDetector_GPU::releaseMemory()
-
-
-
-gpu::FarnebackOpticalFlow
--------------------------
-.. ocv:class:: gpu::FarnebackOpticalFlow
-
-Class computing a dense optical flow using the Gunnar Farneback’s algorithm. ::
-
-    class CV_EXPORTS FarnebackOpticalFlow
-    {
-    public:
-        FarnebackOpticalFlow()
-        {
-            numLevels = 5;
-            pyrScale = 0.5;
-            fastPyramids = false;
-            winSize = 13;
-            numIters = 10;
-            polyN = 5;
-            polySigma = 1.1;
-            flags = 0;
-        }
-
-        int numLevels;
-        double pyrScale;
-        bool fastPyramids;
-        int winSize;
-        int numIters;
-        int polyN;
-        double polySigma;
-        int flags;
-
-        void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
-
-        void releaseMemory();
-
-    private:
-        /* hidden */
-    };
-
-
-
-gpu::FarnebackOpticalFlow::operator ()
---------------------------------------
-Computes a dense optical flow using the Gunnar Farneback’s algorithm.
-
-.. ocv:function:: void gpu::FarnebackOpticalFlow::operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null())
-
-    :param frame0: First 8-bit gray-scale input image
-    :param frame1: Second 8-bit gray-scale input image
-    :param flowx: Flow horizontal component
-    :param flowy: Flow vertical component
-    :param s: Stream
-
-.. seealso:: :ocv:func:`calcOpticalFlowFarneback`
-
-
-
-gpu::FarnebackOpticalFlow::releaseMemory
-----------------------------------------
-Releases unused auxiliary memory buffers.
-
-.. ocv:function:: void gpu::FarnebackOpticalFlow::releaseMemory()
-
-
-
-gpu::PyrLKOpticalFlow
----------------------
-.. ocv:class:: gpu::PyrLKOpticalFlow
-
-Class used for calculating an optical flow. ::
-
-    class PyrLKOpticalFlow
-    {
-    public:
-        PyrLKOpticalFlow();
-
-        void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
-            GpuMat& status, GpuMat* err = 0);
-
-        void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
-
-        Size winSize;
-        int maxLevel;
-        int iters;
-        bool useInitialFlow;
-
-        void releaseMemory();
-    };
-
-The class can calculate an optical flow for a sparse feature set or dense optical flow using the iterative Lucas-Kanade method with pyramids.
-
-.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
-
-
-
-gpu::PyrLKOpticalFlow::sparse
------------------------------
-Calculate an optical flow for a sparse feature set.
-
-.. ocv:function:: void gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err = 0)
-
-    :param prevImg: First 8-bit input image (supports both grayscale and color images).
-
-    :param nextImg: Second input image of the same size and the same type as  ``prevImg`` .
-
-    :param prevPts: Vector of 2D points for which the flow needs to be found. It must be one row matrix with CV_32FC2 type.
-
-    :param nextPts: Output vector of 2D points (with single-precision floating-point coordinates) containing the calculated new positions of input features in the second image. When ``useInitialFlow`` is true, the vector must have the same size as in the input.
-
-    :param status: Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the flow for the corresponding features has been found. Otherwise, it is set to 0.
-
-    :param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
-
-.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
-
-
-
-gpu::PyrLKOpticalFlow::dense
------------------------------
-Calculate dense optical flow.
-
-.. ocv:function:: void gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0)
-
-    :param prevImg: First 8-bit grayscale input image.
-
-    :param nextImg: Second input image of the same size and the same type as  ``prevImg`` .
-
-    :param u: Horizontal component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
-
-    :param v: Vertical component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
-
-    :param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
-
-
-
-gpu::PyrLKOpticalFlow::releaseMemory
-------------------------------------
-Releases inner buffers memory.
-
-.. ocv:function:: void gpu::PyrLKOpticalFlow::releaseMemory()
-
-
-
-gpu::interpolateFrames
-----------------------
-Interpolates frames (images) using provided optical flow (displacement field).
-
-.. ocv:function:: void gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv, float pos, GpuMat& newFrame, GpuMat& buf, Stream& stream = Stream::Null())
-
-    :param frame0: First frame (32-bit floating point images, single channel).
-
-    :param frame1: Second frame. Must have the same type and size as ``frame0`` .
-
-    :param fu: Forward horizontal displacement.
-
-    :param fv: Forward vertical displacement.
-
-    :param bu: Backward horizontal displacement.
-
-    :param bv: Backward vertical displacement.
-
-    :param pos: New frame position.
-
-    :param newFrame: Output image.
-
-    :param buf: Temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, interpolated backward vertical flow.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
 gpu::FGDStatModel
 -----------------
 .. ocv:class:: gpu::FGDStatModel
@@ -687,7 +401,6 @@ Releases all inner buffer's memory.
 
 
 
-.. [Brox2004] T. Brox, A. Bruhn, N. Papenberg, J. Weickert. *High accuracy optical flow estimation based on a theory for warping*. ECCV 2004.
 .. [FGD2003] Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian. *Foreground Object Detection from Videos Containing Complex Background*. ACM MM2003 9p, 2003.
 .. [MOG2001] P. KadewTraKuPong and R. Bowden. *An improved adaptive background mixture model for real-time tracking with shadow detection*. Proc. 2nd European Workshop on Advanced Video-Based Surveillance Systems, 2001
 .. [MOG2004] Z. Zivkovic. *Improved adaptive Gausian mixture model for background subtraction*. International Conference Pattern Recognition, UK, August, 2004
diff --git a/modules/gpubgsegm/doc/gpubgsegm.rst b/modules/gpubgsegm/doc/gpubgsegm.rst
index f4988cb21..ffac8b39c 100644
--- a/modules/gpubgsegm/doc/gpubgsegm.rst
+++ b/modules/gpubgsegm/doc/gpubgsegm.rst
@@ -1,6 +1,6 @@
-********************************************
-gpu. GPU-accelerated Background Segmentation
-********************************************
+**************************************************
+gpubgsegm. GPU-accelerated Background Segmentation
+**************************************************
 
 .. toctree::
     :maxdepth: 1
diff --git a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
index 62d4d9a4c..fbb53f788 100644
--- a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
+++ b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
@@ -43,6 +43,10 @@
 #ifndef __OPENCV_GPUBGSEGM_HPP__
 #define __OPENCV_GPUBGSEGM_HPP__
 
+#ifndef __cplusplus
+#  error gpubgsegm.hpp header must be compiled as C++
+#endif
+
 #include <memory>
 
 #include "opencv2/core/gpumat.hpp"
diff --git a/modules/gpubgsegm/perf/perf_bgfg.cpp b/modules/gpubgsegm/perf/perf_bgsegm.cpp
similarity index 98%
rename from modules/gpubgsegm/perf/perf_bgfg.cpp
rename to modules/gpubgsegm/perf/perf_bgsegm.cpp
index 23db3d899..15842d59b 100644
--- a/modules/gpubgsegm/perf/perf_bgfg.cpp
+++ b/modules/gpubgsegm/perf/perf_bgsegm.cpp
@@ -41,6 +41,7 @@
 //M*/
 
 #include "perf_precomp.hpp"
+#include "opencv2/legacy.hpp"
 
 using namespace std;
 using namespace testing;
@@ -73,7 +74,7 @@ namespace cv
 
 DEF_PARAM_TEST_1(Video, string);
 
-PERF_TEST_P(Video, Video_FGDStatModel,
+PERF_TEST_P(Video, FGDStatModel,
             Values(string("gpu/video/768x576.avi")))
 {
     declare.time(60);
@@ -146,7 +147,7 @@ PERF_TEST_P(Video, Video_FGDStatModel,
 
 DEF_PARAM_TEST(Video_Cn_LearningRate, string, MatCn, double);
 
-PERF_TEST_P(Video_Cn_LearningRate, Video_MOG,
+PERF_TEST_P(Video_Cn_LearningRate, MOG,
             Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
                     GPU_CHANNELS_1_3_4,
                     Values(0.0, 0.01)))
@@ -245,7 +246,7 @@ PERF_TEST_P(Video_Cn_LearningRate, Video_MOG,
 
 DEF_PARAM_TEST(Video_Cn, string, int);
 
-PERF_TEST_P(Video_Cn, Video_MOG2,
+PERF_TEST_P(Video_Cn, MOG2,
             Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
                     GPU_CHANNELS_1_3_4))
 {
@@ -344,7 +345,7 @@ PERF_TEST_P(Video_Cn, Video_MOG2,
 
 #if BUILD_WITH_VIDEO_INPUT_SUPPORT
 
-PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
+PERF_TEST_P(Video_Cn, MOG2GetBackgroundImage,
             Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
                     GPU_CHANNELS_1_3_4))
 {
@@ -428,7 +429,7 @@ PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
 
 DEF_PARAM_TEST(Video_Cn_MaxFeatures, string, MatCn, int);
 
-PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
+PERF_TEST_P(Video_Cn_MaxFeatures, GMG,
             Combine(Values(string("gpu/video/768x576.avi")),
                     GPU_CHANNELS_1_3_4,
                     Values(20, 40, 60)))
diff --git a/modules/gpubgsegm/perf/perf_main.cpp b/modules/gpubgsegm/perf/perf_main.cpp
index b35791cda..99066f450 100644
--- a/modules/gpubgsegm/perf/perf_main.cpp
+++ b/modules/gpubgsegm/perf/perf_main.cpp
@@ -44,4 +44,4 @@
 
 using namespace perf;
 
-CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo())
+CV_PERF_TEST_MAIN(gpubgsegm, printCudaInfo())
diff --git a/modules/gpubgsegm/perf/perf_precomp.hpp b/modules/gpubgsegm/perf/perf_precomp.hpp
index 3343fe30a..3a13b9200 100644
--- a/modules/gpubgsegm/perf/perf_precomp.hpp
+++ b/modules/gpubgsegm/perf/perf_precomp.hpp
@@ -55,9 +55,7 @@
 #include "opencv2/ts/gpu_perf.hpp"
 
 #include "opencv2/gpubgsegm.hpp"
-
 #include "opencv2/video.hpp"
-#include "opencv2/legacy.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
diff --git a/modules/gpubgsegm/src/cuda/fgd_bgfg.cu b/modules/gpubgsegm/src/cuda/fgd.cu
similarity index 99%
rename from modules/gpubgsegm/src/cuda/fgd_bgfg.cu
rename to modules/gpubgsegm/src/cuda/fgd.cu
index d8c1df190..3d5519945 100644
--- a/modules/gpubgsegm/src/cuda/fgd_bgfg.cu
+++ b/modules/gpubgsegm/src/cuda/fgd.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/utility.hpp"
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/functional.hpp"
-#include "fgd_bgfg_common.hpp"
+#include "fgd.hpp"
 
 using namespace cv::gpu;
 using namespace cv::gpu::cudev;
diff --git a/modules/gpubgsegm/src/cuda/fgd_bgfg_common.hpp b/modules/gpubgsegm/src/cuda/fgd.hpp
similarity index 100%
rename from modules/gpubgsegm/src/cuda/fgd_bgfg_common.hpp
rename to modules/gpubgsegm/src/cuda/fgd.hpp
diff --git a/modules/gpubgsegm/src/cuda/bgfg_gmg.cu b/modules/gpubgsegm/src/cuda/gmg.cu
similarity index 100%
rename from modules/gpubgsegm/src/cuda/bgfg_gmg.cu
rename to modules/gpubgsegm/src/cuda/gmg.cu
diff --git a/modules/gpubgsegm/src/cuda/bgfg_mog.cu b/modules/gpubgsegm/src/cuda/mog.cu
similarity index 100%
rename from modules/gpubgsegm/src/cuda/bgfg_mog.cu
rename to modules/gpubgsegm/src/cuda/mog.cu
diff --git a/modules/gpubgsegm/src/fgd_bgfg.cpp b/modules/gpubgsegm/src/fgd.cpp
similarity index 99%
rename from modules/gpubgsegm/src/fgd_bgfg.cpp
rename to modules/gpubgsegm/src/fgd.cpp
index c0bd76532..1b4038304 100644
--- a/modules/gpubgsegm/src/fgd_bgfg.cpp
+++ b/modules/gpubgsegm/src/fgd.cpp
@@ -59,7 +59,7 @@ int cv::gpu::FGDStatModel::update(const cv::gpu::GpuMat&) { throw_no_cuda(); ret
 
 #else
 
-#include "fgd_bgfg_common.hpp"
+#include "cuda/fgd.hpp"
 #include "opencv2/imgproc/imgproc_c.h"
 
 namespace
diff --git a/modules/gpubgsegm/src/bgfg_gmg.cpp b/modules/gpubgsegm/src/gmg.cpp
similarity index 100%
rename from modules/gpubgsegm/src/bgfg_gmg.cpp
rename to modules/gpubgsegm/src/gmg.cpp
diff --git a/modules/gpubgsegm/src/bgfg_mog.cpp b/modules/gpubgsegm/src/mog.cpp
similarity index 100%
rename from modules/gpubgsegm/src/bgfg_mog.cpp
rename to modules/gpubgsegm/src/mog.cpp
diff --git a/modules/gpubgsegm/src/precomp.hpp b/modules/gpubgsegm/src/precomp.hpp
index 4a0b01267..a381857af 100644
--- a/modules/gpubgsegm/src/precomp.hpp
+++ b/modules/gpubgsegm/src/precomp.hpp
@@ -46,23 +46,10 @@
 #include <limits>
 
 #include "opencv2/gpubgsegm.hpp"
-
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpufilters.hpp"
 #include "opencv2/gpuimgproc.hpp"
-#include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/video.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
 
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_GPULEGACY
-#  include "opencv2/gpulegacy/private.hpp"
-#endif
-
-#ifdef HAVE_CUDA
-#  include "cuda/fgd_bgfg_common.hpp"
-#endif
-
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/gpubgsegm/test/test_bgfg.cpp b/modules/gpubgsegm/test/test_bgsegm.cpp
similarity index 97%
rename from modules/gpubgsegm/test/test_bgfg.cpp
rename to modules/gpubgsegm/test/test_bgsegm.cpp
index d5151fc3d..a5d187b04 100644
--- a/modules/gpubgsegm/test/test_bgfg.cpp
+++ b/modules/gpubgsegm/test/test_bgsegm.cpp
@@ -143,7 +143,7 @@ GPU_TEST_P(FGDStatModel, Update)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, FGDStatModel, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_BgSegm, FGDStatModel, testing::Combine(
     ALL_DEVICES,
     testing::Values(std::string("768x576.avi")),
     testing::Values(Channels(3), Channels(4))));
@@ -219,7 +219,7 @@ GPU_TEST_P(MOG, Update)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, MOG, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_BgSegm, MOG, testing::Combine(
     ALL_DEVICES,
     testing::Values(std::string("768x576.avi")),
     testing::Values(UseGray(true), UseGray(false)),
@@ -339,7 +339,7 @@ GPU_TEST_P(MOG2, getBackgroundImage)
     ASSERT_MAT_NEAR(background_gold, background, 0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_BgSegm, MOG2, testing::Combine(
     ALL_DEVICES,
     testing::Values(std::string("768x576.avi")),
     testing::Values(UseGray(true), UseGray(false)),
@@ -395,7 +395,7 @@ GPU_TEST_P(GMG, Accuracy)
     ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, GMG, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_BgSegm, GMG, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8U), MatType(CV_16U), MatType(CV_32F)),
diff --git a/modules/gpubgsegm/test/test_precomp.hpp b/modules/gpubgsegm/test/test_precomp.hpp
index 3c6ad197b..0249f5be9 100644
--- a/modules/gpubgsegm/test/test_precomp.hpp
+++ b/modules/gpubgsegm/test/test_precomp.hpp
@@ -57,8 +57,6 @@
 #include "opencv2/ts/gpu_test.hpp"
 
 #include "opencv2/gpubgsegm.hpp"
-
 #include "opencv2/video.hpp"
-#include "opencv2/legacy.hpp"
 
 #endif

From 3156e803beeff1eb73ce2ed24b09a7f4d6317801 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:36:52 +0400
Subject: [PATCH 37/49] gpuoptflow module fixes

---
 modules/gpuoptflow/CMakeLists.txt             |   2 +-
 modules/gpuoptflow/doc/optflow.rst            |  23 +--
 .../gpuoptflow/include/opencv2/gpuoptflow.hpp |   7 +-
 modules/gpuoptflow/perf/perf_optflow.cpp      |  18 +-
 modules/gpuoptflow/perf/perf_precomp.hpp      |   2 -
 .../gpuoptflow/src/{optflowbm.cpp => bm.cpp}  |  70 ++------
 modules/gpuoptflow/src/bm_fast.cpp            |  90 ++++++++++
 .../src/{optical_flow.cpp => brox.cpp}        | 109 +----------
 modules/gpuoptflow/src/cuda/bm.cu             | 169 ++++++++++++++++++
 .../src/cuda/{optflowbm.cu => bm_fast.cu}     | 119 ------------
 ...optical_flow_farneback.cu => farneback.cu} |   0
 .../cuda/{optical_flow.cu => needle_map.cu}   |   0
 ...tical_flow_farneback.cpp => farneback.cpp} |  45 +----
 modules/gpuoptflow/src/interpolate_frames.cpp | 113 ++++++++++++
 modules/gpuoptflow/src/needle_map.cpp         | 100 +++++++++++
 modules/gpuoptflow/src/precomp.hpp            |   2 +-
 modules/gpuoptflow/src/pyrlk.cpp              |  14 +-
 modules/gpuoptflow/test/test_optflow.cpp      |  12 +-
 modules/gpuoptflow/test/test_precomp.hpp      |   2 -
 19 files changed, 526 insertions(+), 371 deletions(-)
 rename modules/gpuoptflow/src/{optflowbm.cpp => bm.cpp} (73%)
 create mode 100644 modules/gpuoptflow/src/bm_fast.cpp
 rename modules/gpuoptflow/src/{optical_flow.cpp => brox.cpp} (55%)
 create mode 100644 modules/gpuoptflow/src/cuda/bm.cu
 rename modules/gpuoptflow/src/cuda/{optflowbm.cu => bm_fast.cu} (72%)
 rename modules/gpuoptflow/src/cuda/{optical_flow_farneback.cu => farneback.cu} (100%)
 rename modules/gpuoptflow/src/cuda/{optical_flow.cu => needle_map.cu} (100%)
 rename modules/gpuoptflow/src/{optical_flow_farneback.cpp => farneback.cpp} (88%)
 create mode 100644 modules/gpuoptflow/src/interpolate_frames.cpp
 create mode 100644 modules/gpuoptflow/src/needle_map.cpp

diff --git a/modules/gpuoptflow/CMakeLists.txt b/modules/gpuoptflow/CMakeLists.txt
index 120262d25..283891bb0 100644
--- a/modules/gpuoptflow/CMakeLists.txt
+++ b/modules/gpuoptflow/CMakeLists.txt
@@ -4,6 +4,6 @@ endif()
 
 set(the_description "GPU-accelerated Optical Flow")
 
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
 
 ocv_define_module(gpuoptflow opencv_video opencv_legacy opencv_gpuarithm opencv_gpuwarping opencv_gpuimgproc OPTIONAL opencv_gpulegacy)
diff --git a/modules/gpuoptflow/doc/optflow.rst b/modules/gpuoptflow/doc/optflow.rst
index d8f153610..5962b2b81 100644
--- a/modules/gpuoptflow/doc/optflow.rst
+++ b/modules/gpuoptflow/doc/optflow.rst
@@ -1,5 +1,5 @@
-Video Analysis
-==============
+Optical Flow
+============
 
 .. highlight:: cpp
 
@@ -46,25 +46,6 @@ Class computing the optical flow for two images using Brox et al Optical Flow al
 
 
 
-gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU
----------------------------------------------------------------------
-Constructor.
-
-.. ocv:function:: gpu::GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04)
-
-    :param maxCorners: Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned.
-
-    :param qualityLevel: Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see  :ocv:func:`gpu::cornerMinEigenVal` ) or the Harris function response (see  :ocv:func:`gpu::cornerHarris` ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the  ``qualityLevel=0.01`` , then all the corners with the quality measure less than 15 are rejected.
-
-    :param minDistance: Minimum possible Euclidean distance between the returned corners.
-
-    :param blockSize: Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See  :ocv:func:`cornerEigenValsAndVecs` .
-
-    :param useHarrisDetector: Parameter indicating whether to use a Harris detector (see :ocv:func:`gpu::cornerHarris`) or :ocv:func:`gpu::cornerMinEigenVal`.
-
-    :param harrisK: Free parameter of the Harris detector.
-
-
 gpu::FarnebackOpticalFlow
 -------------------------
 .. ocv:class:: gpu::FarnebackOpticalFlow
diff --git a/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp b/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
index 4e245195d..f47a55ea1 100644
--- a/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
+++ b/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
@@ -43,12 +43,14 @@
 #ifndef __OPENCV_GPUOPTFLOW_HPP__
 #define __OPENCV_GPUOPTFLOW_HPP__
 
+#ifndef __cplusplus
+#  error gpuoptflow.hpp header must be compiled as C++
+#endif
+
 #include "opencv2/core/gpumat.hpp"
 
 namespace cv { namespace gpu {
 
-////////////////////////////////// Optical Flow //////////////////////////////////////////
-
 class CV_EXPORTS BroxOpticalFlow
 {
 public:
@@ -282,7 +284,6 @@ private:
     GpuMat extended_I1;
 };
 
-
 //! Interpolate frames (images) using provided optical flow (displacement field).
 //! frame0   - frame 0 (32-bit floating point images, single channel)
 //! frame1   - frame 1 (the same type and size)
diff --git a/modules/gpuoptflow/perf/perf_optflow.cpp b/modules/gpuoptflow/perf/perf_optflow.cpp
index febaee5f8..6f2527fe9 100644
--- a/modules/gpuoptflow/perf/perf_optflow.cpp
+++ b/modules/gpuoptflow/perf/perf_optflow.cpp
@@ -54,7 +54,7 @@ typedef pair<string, string> pair_string;
 
 DEF_PARAM_TEST_1(ImagePair, pair_string);
 
-PERF_TEST_P(ImagePair, Video_InterpolateFrames,
+PERF_TEST_P(ImagePair, InterpolateFrames,
             Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
@@ -95,7 +95,7 @@ PERF_TEST_P(ImagePair, Video_InterpolateFrames,
 //////////////////////////////////////////////////////
 // CreateOpticalFlowNeedleMap
 
-PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
+PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap,
             Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
@@ -135,7 +135,7 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
 //////////////////////////////////////////////////////
 // BroxOpticalFlow
 
-PERF_TEST_P(ImagePair, Video_BroxOpticalFlow,
+PERF_TEST_P(ImagePair, BroxOpticalFlow,
             Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(300);
@@ -175,7 +175,7 @@ PERF_TEST_P(ImagePair, Video_BroxOpticalFlow,
 
 DEF_PARAM_TEST(ImagePair_Gray_NPts_WinSz_Levels_Iters, pair_string, bool, int, int, int, int);
 
-PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse,
+PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, PyrLKOpticalFlowSparse,
             Combine(Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
                     Bool(),
                     Values(8000),
@@ -248,7 +248,7 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse
 
 DEF_PARAM_TEST(ImagePair_WinSz_Levels_Iters, pair_string, int, int, int);
 
-PERF_TEST_P(ImagePair_WinSz_Levels_Iters, Video_PyrLKOpticalFlowDense,
+PERF_TEST_P(ImagePair_WinSz_Levels_Iters, PyrLKOpticalFlowDense,
             Combine(Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
                     Values(3, 5, 7, 9, 13, 17, 21),
                     Values(1, 3),
@@ -293,7 +293,7 @@ PERF_TEST_P(ImagePair_WinSz_Levels_Iters, Video_PyrLKOpticalFlowDense,
 //////////////////////////////////////////////////////
 // FarnebackOpticalFlow
 
-PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow,
+PERF_TEST_P(ImagePair, FarnebackOpticalFlow,
             Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(10);
@@ -346,7 +346,7 @@ PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow,
 //////////////////////////////////////////////////////
 // OpticalFlowDual_TVL1
 
-PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
+PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
             Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(20);
@@ -407,7 +407,7 @@ void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
     cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely);
 }
 
-PERF_TEST_P(ImagePair, Video_OpticalFlowBM,
+PERF_TEST_P(ImagePair, OpticalFlowBM,
             Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(400);
@@ -444,7 +444,7 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowBM,
     }
 }
 
-PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM,
+PERF_TEST_P(ImagePair, FastOpticalFlowBM,
             Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(400);
diff --git a/modules/gpuoptflow/perf/perf_precomp.hpp b/modules/gpuoptflow/perf/perf_precomp.hpp
index cdc671b79..9235b62dc 100644
--- a/modules/gpuoptflow/perf/perf_precomp.hpp
+++ b/modules/gpuoptflow/perf/perf_precomp.hpp
@@ -55,9 +55,7 @@
 #include "opencv2/ts/gpu_perf.hpp"
 
 #include "opencv2/gpuoptflow.hpp"
-
 #include "opencv2/video.hpp"
-#include "opencv2/legacy.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
diff --git a/modules/gpuoptflow/src/optflowbm.cpp b/modules/gpuoptflow/src/bm.cpp
similarity index 73%
rename from modules/gpuoptflow/src/optflowbm.cpp
rename to modules/gpuoptflow/src/bm.cpp
index cf0e9b9a3..b8daa96b0 100644
--- a/modules/gpuoptflow/src/optflowbm.cpp
+++ b/modules/gpuoptflow/src/bm.cpp
@@ -49,8 +49,6 @@ using namespace cv::gpu;
 
 void cv::gpu::calcOpticalFlowBM(const GpuMat&, const GpuMat&, Size, Size, Size, bool, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
-
 #else // HAVE_CUDA
 
 namespace optflowbm
@@ -94,29 +92,29 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
         // upper side
         for (int j = -i; j <= i + 1; ++j, ++ssCount)
         {
-            ss[ssCount].x = ++x;
-            ss[ssCount].y = y;
+            ss[ssCount].x = (short) ++x;
+            ss[ssCount].y = (short) y;
         }
 
         // right side
         for (int j = -i; j <= i + 1; ++j, ++ssCount)
         {
-            ss[ssCount].x = x;
-            ss[ssCount].y = ++y;
+            ss[ssCount].x = (short) x;
+            ss[ssCount].y = (short) ++y;
         }
 
         // bottom side
         for (int j = -i; j <= i + 1; ++j, ++ssCount)
         {
-            ss[ssCount].x = --x;
-            ss[ssCount].y = y;
+            ss[ssCount].x = (short) --x;
+            ss[ssCount].y = (short) y;
         }
 
         // left side
         for (int j = -i; j <= i + 1; ++j, ++ssCount)
         {
-            ss[ssCount].x = x;
-            ss[ssCount].y = --y;
+            ss[ssCount].x = (short) x;
+            ss[ssCount].y = (short) --y;
         }
     }
 
@@ -135,8 +133,8 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
             // upper side
             for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x)
             {
-                ss[ssCount].x = x;
-                ss[ssCount].y = y;
+                ss[ssCount].x = (short) x;
+                ss[ssCount].y = (short) y;
             }
 
             x = xleft;
@@ -145,8 +143,8 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
             // bottom side
             for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x)
             {
-                ss[ssCount].x = x;
-                ss[ssCount].y = y;
+                ss[ssCount].x = (short) x;
+                ss[ssCount].y = (short) y;
             }
         }
     }
@@ -164,8 +162,8 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
             // left side
             for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y)
             {
-                ss[ssCount].x = x;
-                ss[ssCount].y = y;
+                ss[ssCount].x = (short) x;
+                ss[ssCount].y = (short) y;
             }
 
             y = yupper;
@@ -174,8 +172,8 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
             // right side
             for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y)
             {
-                ss[ssCount].x = x;
-                ss[ssCount].y = y;
+                ss[ssCount].x = (short) x;
+                ss[ssCount].y = (short) y;
             }
         }
     }
@@ -203,40 +201,4 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
                     maxX, maxY, acceptLevel, escapeLevel, buf.ptr<short2>(), ssCount, stream);
 }
 
-namespace optflowbm_fast
-{
-    void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows);
-
-    template <typename T>
-    void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
-}
-
-void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream)
-{
-    CV_Assert( I0.type() == CV_8UC1 );
-    CV_Assert( I1.size() == I0.size() && I1.type() == I0.type() );
-
-    int border_size = search_window / 2 + block_window / 2;
-    Size esize = I0.size() + Size(border_size, border_size) * 2;
-
-    ensureSizeIsEnough(esize, I0.type(), extended_I0);
-    ensureSizeIsEnough(esize, I0.type(), extended_I1);
-
-    copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
-    copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
-
-    GpuMat I0_hdr = extended_I0(Rect(Point2i(border_size, border_size), I0.size()));
-    GpuMat I1_hdr = extended_I1(Rect(Point2i(border_size, border_size), I0.size()));
-
-    int bcols, brows;
-    optflowbm_fast::get_buffer_size(I0.cols, I0.rows, search_window, block_window, bcols, brows);
-
-    ensureSizeIsEnough(brows, bcols, CV_32SC1, buffer);
-
-    flowx.create(I0.size(), CV_32FC1);
-    flowy.create(I0.size(), CV_32FC1);
-
-    optflowbm_fast::calc<uchar>(I0_hdr, I1_hdr, flowx, flowy, buffer, search_window, block_window, StreamAccessor::getStream(stream));
-}
-
 #endif // HAVE_CUDA
diff --git a/modules/gpuoptflow/src/bm_fast.cpp b/modules/gpuoptflow/src/bm_fast.cpp
new file mode 100644
index 000000000..edab65386
--- /dev/null
+++ b/modules/gpuoptflow/src/bm_fast.cpp
@@ -0,0 +1,90 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+
+void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
+
+#else // HAVE_CUDA
+
+namespace optflowbm_fast
+{
+    void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows);
+
+    template <typename T>
+    void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
+}
+
+void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream)
+{
+    CV_Assert( I0.type() == CV_8UC1 );
+    CV_Assert( I1.size() == I0.size() && I1.type() == I0.type() );
+
+    int border_size = search_window / 2 + block_window / 2;
+    Size esize = I0.size() + Size(border_size, border_size) * 2;
+
+    ensureSizeIsEnough(esize, I0.type(), extended_I0);
+    ensureSizeIsEnough(esize, I0.type(), extended_I1);
+
+    gpu::copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
+    gpu::copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
+
+    GpuMat I0_hdr = extended_I0(Rect(Point2i(border_size, border_size), I0.size()));
+    GpuMat I1_hdr = extended_I1(Rect(Point2i(border_size, border_size), I0.size()));
+
+    int bcols, brows;
+    optflowbm_fast::get_buffer_size(I0.cols, I0.rows, search_window, block_window, bcols, brows);
+
+    ensureSizeIsEnough(brows, bcols, CV_32SC1, buffer);
+
+    flowx.create(I0.size(), CV_32FC1);
+    flowy.create(I0.size(), CV_32FC1);
+
+    optflowbm_fast::calc<uchar>(I0_hdr, I1_hdr, flowx, flowy, buffer, search_window, block_window, StreamAccessor::getStream(stream));
+}
+
+#endif // HAVE_CUDA
diff --git a/modules/gpuoptflow/src/optical_flow.cpp b/modules/gpuoptflow/src/brox.cpp
similarity index 55%
rename from modules/gpuoptflow/src/optical_flow.cpp
rename to modules/gpuoptflow/src/brox.cpp
index a5b741410..b5db69e2b 100644
--- a/modules/gpuoptflow/src/optical_flow.cpp
+++ b/modules/gpuoptflow/src/brox.cpp
@@ -45,11 +45,9 @@
 using namespace cv;
 using namespace cv::gpu;
 
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPULEGACY) || defined (CUDA_DISABLER)
 
 void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::interpolateFrames(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::createOpticalFlowNeedleMap(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
 
 #else
 
@@ -129,109 +127,4 @@ void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& f
     ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
 }
 
-void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
-                                float pos, GpuMat& newFrame, GpuMat& buf, Stream& s)
-{
-    CV_Assert(frame0.type() == CV_32FC1);
-    CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
-    CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
-    CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
-    CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
-    CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
-
-    newFrame.create(frame0.size(), frame0.type());
-
-    buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
-    buf.setTo(Scalar::all(0));
-
-    // occlusion masks
-    GpuMat occ0 = buf.rowRange(0 * frame0.rows, 1 * frame0.rows);
-    GpuMat occ1 = buf.rowRange(1 * frame0.rows, 2 * frame0.rows);
-
-    // interpolated forward flow
-    GpuMat fui = buf.rowRange(2 * frame0.rows, 3 * frame0.rows);
-    GpuMat fvi = buf.rowRange(3 * frame0.rows, 4 * frame0.rows);
-
-    // interpolated backward flow
-    GpuMat bui = buf.rowRange(4 * frame0.rows, 5 * frame0.rows);
-    GpuMat bvi = buf.rowRange(5 * frame0.rows, 6 * frame0.rows);
-
-    size_t step = frame0.step;
-
-    CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-    NppStStreamHandler h(stream);
-
-    NppStInterpolationState state;
-
-    state.size         = NcvSize32u(frame0.cols, frame0.rows);
-    state.nStep        = static_cast<Ncv32u>(step);
-    state.pSrcFrame0   = const_cast<Ncv32f*>(frame0.ptr<Ncv32f>());
-    state.pSrcFrame1   = const_cast<Ncv32f*>(frame1.ptr<Ncv32f>());
-    state.pFU          = const_cast<Ncv32f*>(fu.ptr<Ncv32f>());
-    state.pFV          = const_cast<Ncv32f*>(fv.ptr<Ncv32f>());
-    state.pBU          = const_cast<Ncv32f*>(bu.ptr<Ncv32f>());
-    state.pBV          = const_cast<Ncv32f*>(bv.ptr<Ncv32f>());
-    state.pos          = pos;
-    state.pNewFrame    = newFrame.ptr<Ncv32f>();
-    state.ppBuffers[0] = occ0.ptr<Ncv32f>();
-    state.ppBuffers[1] = occ1.ptr<Ncv32f>();
-    state.ppBuffers[2] = fui.ptr<Ncv32f>();
-    state.ppBuffers[3] = fvi.ptr<Ncv32f>();
-    state.ppBuffers[4] = bui.ptr<Ncv32f>();
-    state.ppBuffers[5] = bvi.ptr<Ncv32f>();
-
-    ncvSafeCall( nppiStInterpolateFrames(&state) );
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-}
-
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace optical_flow
-    {
-        void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg);
-        void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale);
-    }
-}}}
-
-void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors)
-{
-    using namespace cv::gpu::cudev::optical_flow;
-
-    CV_Assert(u.type() == CV_32FC1);
-    CV_Assert(v.type() == u.type() && v.size() == u.size());
-
-    const int NEEDLE_MAP_SCALE = 16;
-
-    const int x_needles = u.cols / NEEDLE_MAP_SCALE;
-    const int y_needles = u.rows / NEEDLE_MAP_SCALE;
-
-    GpuMat u_avg(y_needles, x_needles, CV_32FC1);
-    GpuMat v_avg(y_needles, x_needles, CV_32FC1);
-
-    NeedleMapAverage_gpu(u, v, u_avg, v_avg);
-
-    const int NUM_VERTS_PER_ARROW = 6;
-
-    const int num_arrows = x_needles * y_needles * NUM_VERTS_PER_ARROW;
-
-    vertex.create(1, num_arrows, CV_32FC3);
-    colors.create(1, num_arrows, CV_32FC3);
-
-    colors.setTo(Scalar::all(1.0));
-
-    double uMax, vMax;
-    minMax(u_avg, 0, &uMax);
-    minMax(v_avg, 0, &vMax);
-
-    float max_flow = static_cast<float>(std::sqrt(uMax * uMax + vMax * vMax));
-
-    CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);
-
-    cvtColor(colors, colors, COLOR_HSV2RGB);
-}
-
 #endif /* HAVE_CUDA */
diff --git a/modules/gpuoptflow/src/cuda/bm.cu b/modules/gpuoptflow/src/cuda/bm.cu
new file mode 100644
index 000000000..9150d29a1
--- /dev/null
+++ b/modules/gpuoptflow/src/cuda/bm.cu
@@ -0,0 +1,169 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/limits.hpp"
+#include "opencv2/core/cuda/functional.hpp"
+#include "opencv2/core/cuda/reduce.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::cudev;
+
+namespace optflowbm
+{
+    texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
+    texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
+
+    __device__ int cmpBlocks(int X1, int Y1, int X2, int Y2, int2 blockSize)
+    {
+        int s = 0;
+
+        for (int y = 0; y < blockSize.y; ++y)
+        {
+            for (int x = 0; x < blockSize.x; ++x)
+                s += ::abs(tex2D(tex_prev, X1 + x, Y1 + y) - tex2D(tex_curr, X2 + x, Y2 + y));
+        }
+
+        return s;
+    }
+
+    __global__ void calcOptFlowBM(PtrStepSzf velx, PtrStepf vely, const int2 blockSize, const int2 shiftSize, const bool usePrevious,
+                                  const int maxX, const int maxY, const int acceptLevel, const int escapeLevel,
+                                  const short2* ss, const int ssCount)
+    {
+        const int j = blockIdx.x * blockDim.x + threadIdx.x;
+        const int i = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (i >= velx.rows || j >= velx.cols)
+            return;
+
+        const int X1 = j * shiftSize.x;
+        const int Y1 = i * shiftSize.y;
+
+        const int offX = usePrevious ? __float2int_rn(velx(i, j)) : 0;
+        const int offY = usePrevious ? __float2int_rn(vely(i, j)) : 0;
+
+        int X2 = X1 + offX;
+        int Y2 = Y1 + offY;
+
+        int dist = numeric_limits<int>::max();
+
+        if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
+            dist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
+
+        int countMin = 1;
+        int sumx = offX;
+        int sumy = offY;
+
+        if (dist > acceptLevel)
+        {
+            // do brute-force search
+            for (int k = 0; k < ssCount; ++k)
+            {
+                const short2 ssVal = ss[k];
+
+                const int dx = offX + ssVal.x;
+                const int dy = offY + ssVal.y;
+
+                X2 = X1 + dx;
+                Y2 = Y1 + dy;
+
+                if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
+                {
+                    const int tmpDist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
+                    if (tmpDist < acceptLevel)
+                    {
+                        sumx = dx;
+                        sumy = dy;
+                        countMin = 1;
+                        break;
+                    }
+
+                    if (tmpDist < dist)
+                    {
+                        dist = tmpDist;
+                        sumx = dx;
+                        sumy = dy;
+                        countMin = 1;
+                    }
+                    else if (tmpDist == dist)
+                    {
+                        sumx += dx;
+                        sumy += dy;
+                        countMin++;
+                    }
+                }
+            }
+
+            if (dist > escapeLevel)
+            {
+                sumx = offX;
+                sumy = offY;
+                countMin = 1;
+            }
+        }
+
+        velx(i, j) = static_cast<float>(sumx) / countMin;
+        vely(i, j) = static_cast<float>(sumy) / countMin;
+    }
+
+    void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
+              int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
+    {
+        bindTexture(&tex_prev, prev);
+        bindTexture(&tex_curr, curr);
+
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(velx.cols, block.x), divUp(vely.rows, block.y));
+
+        calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious,
+                                                  maxX, maxY, acceptLevel,  escapeLevel, ss, ssCount);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+}
+
+#endif // !defined CUDA_DISABLER
diff --git a/modules/gpuoptflow/src/cuda/optflowbm.cu b/modules/gpuoptflow/src/cuda/bm_fast.cu
similarity index 72%
rename from modules/gpuoptflow/src/cuda/optflowbm.cu
rename to modules/gpuoptflow/src/cuda/bm_fast.cu
index 8f5b72efa..46f78a9f9 100644
--- a/modules/gpuoptflow/src/cuda/optflowbm.cu
+++ b/modules/gpuoptflow/src/cuda/bm_fast.cu
@@ -50,125 +50,6 @@
 using namespace cv::gpu;
 using namespace cv::gpu::cudev;
 
-namespace optflowbm
-{
-    texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
-    texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
-
-    __device__ int cmpBlocks(int X1, int Y1, int X2, int Y2, int2 blockSize)
-    {
-        int s = 0;
-
-        for (int y = 0; y < blockSize.y; ++y)
-        {
-            for (int x = 0; x < blockSize.x; ++x)
-                s += ::abs(tex2D(tex_prev, X1 + x, Y1 + y) - tex2D(tex_curr, X2 + x, Y2 + y));
-        }
-
-        return s;
-    }
-
-    __global__ void calcOptFlowBM(PtrStepSzf velx, PtrStepf vely, const int2 blockSize, const int2 shiftSize, const bool usePrevious,
-                                  const int maxX, const int maxY, const int acceptLevel, const int escapeLevel,
-                                  const short2* ss, const int ssCount)
-    {
-        const int j = blockIdx.x * blockDim.x + threadIdx.x;
-        const int i = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if (i >= velx.rows || j >= velx.cols)
-            return;
-
-        const int X1 = j * shiftSize.x;
-        const int Y1 = i * shiftSize.y;
-
-        const int offX = usePrevious ? __float2int_rn(velx(i, j)) : 0;
-        const int offY = usePrevious ? __float2int_rn(vely(i, j)) : 0;
-
-        int X2 = X1 + offX;
-        int Y2 = Y1 + offY;
-
-        int dist = numeric_limits<int>::max();
-
-        if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
-            dist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
-
-        int countMin = 1;
-        int sumx = offX;
-        int sumy = offY;
-
-        if (dist > acceptLevel)
-        {
-            // do brute-force search
-            for (int k = 0; k < ssCount; ++k)
-            {
-                const short2 ssVal = ss[k];
-
-                const int dx = offX + ssVal.x;
-                const int dy = offY + ssVal.y;
-
-                X2 = X1 + dx;
-                Y2 = Y1 + dy;
-
-                if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
-                {
-                    const int tmpDist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
-                    if (tmpDist < acceptLevel)
-                    {
-                        sumx = dx;
-                        sumy = dy;
-                        countMin = 1;
-                        break;
-                    }
-
-                    if (tmpDist < dist)
-                    {
-                        dist = tmpDist;
-                        sumx = dx;
-                        sumy = dy;
-                        countMin = 1;
-                    }
-                    else if (tmpDist == dist)
-                    {
-                        sumx += dx;
-                        sumy += dy;
-                        countMin++;
-                    }
-                }
-            }
-
-            if (dist > escapeLevel)
-            {
-                sumx = offX;
-                sumy = offY;
-                countMin = 1;
-            }
-        }
-
-        velx(i, j) = static_cast<float>(sumx) / countMin;
-        vely(i, j) = static_cast<float>(sumy) / countMin;
-    }
-
-    void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
-              int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
-    {
-        bindTexture(&tex_prev, prev);
-        bindTexture(&tex_curr, curr);
-
-        const dim3 block(32, 8);
-        const dim3 grid(divUp(velx.cols, block.x), divUp(vely.rows, block.y));
-
-        calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious,
-                                                  maxX, maxY, acceptLevel,  escapeLevel, ss, ssCount);
-        cudaSafeCall( cudaGetLastError() );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    }
-}
-
-/////////////////////////////////////////////////////////
-// Fast approximate version
-
 namespace optflowbm_fast
 {
     enum
diff --git a/modules/gpuoptflow/src/cuda/optical_flow_farneback.cu b/modules/gpuoptflow/src/cuda/farneback.cu
similarity index 100%
rename from modules/gpuoptflow/src/cuda/optical_flow_farneback.cu
rename to modules/gpuoptflow/src/cuda/farneback.cu
diff --git a/modules/gpuoptflow/src/cuda/optical_flow.cu b/modules/gpuoptflow/src/cuda/needle_map.cu
similarity index 100%
rename from modules/gpuoptflow/src/cuda/optical_flow.cu
rename to modules/gpuoptflow/src/cuda/needle_map.cu
diff --git a/modules/gpuoptflow/src/optical_flow_farneback.cpp b/modules/gpuoptflow/src/farneback.cpp
similarity index 88%
rename from modules/gpuoptflow/src/optical_flow_farneback.cpp
rename to modules/gpuoptflow/src/farneback.cpp
index efe2436e6..60a9cda67 100644
--- a/modules/gpuoptflow/src/optical_flow_farneback.cpp
+++ b/modules/gpuoptflow/src/farneback.cpp
@@ -247,8 +247,8 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
         pyramid1_[0] = frames_[1];
         for (int i = 1; i <= numLevelsCropped; ++i)
         {
-            pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
-            pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
+            gpu::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
+            gpu::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
         }
     }
 
@@ -291,22 +291,10 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
         {
             if (flags & OPTFLOW_USE_INITIAL_FLOW)
             {
-#if ENABLE_GPU_RESIZE
-                resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
-                resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
+                gpu::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
+                gpu::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
                 streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), scale);
                 streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), scale);
-#else
-                Mat tmp1, tmp2;
-                flowx0.download(tmp1);
-                resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA);
-                tmp2 *= scale;
-                curFlowX.upload(tmp2);
-                flowy0.download(tmp1);
-                resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA);
-                tmp2 *= scale;
-                curFlowY.upload(tmp2);
-#endif
             }
             else
             {
@@ -316,22 +304,10 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
         }
         else
         {
-#if ENABLE_GPU_RESIZE
-            resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
-            resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
+            gpu::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
+            gpu::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
             streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), 1./pyrScale);
             streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), 1./pyrScale);
-#else
-            Mat tmp1, tmp2;
-            prevFlowX.download(tmp1);
-            resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_LINEAR);
-            tmp2 *= 1./pyrScale;
-            curFlowX.upload(tmp2);
-            prevFlowY.download(tmp1);
-            resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_LINEAR);
-            tmp2 *= 1./pyrScale;
-            curFlowY.upload(tmp2);
-#endif
         }
 
         GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
@@ -367,14 +343,7 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
             {
                 cudev::optflow_farneback::gaussianBlurGpu(
                         frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i]));
-#if ENABLE_GPU_RESIZE
-                resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR, streams[i]);
-#else
-                Mat tmp1, tmp2;
-                tmp[i].download(tmp1);
-                resize(tmp1, tmp2, Size(width, height), INTER_LINEAR);
-                I[i].upload(tmp2);
-#endif
+                gpu::resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR, streams[i]);
                 cudev::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i]));
             }
         }
diff --git a/modules/gpuoptflow/src/interpolate_frames.cpp b/modules/gpuoptflow/src/interpolate_frames.cpp
new file mode 100644
index 000000000..f6fe9c510
--- /dev/null
+++ b/modules/gpuoptflow/src/interpolate_frames.cpp
@@ -0,0 +1,113 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPULEGACY) || defined (CUDA_DISABLER)
+
+void cv::gpu::interpolateFrames(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+
+#else
+
+void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
+                                float pos, GpuMat& newFrame, GpuMat& buf, Stream& s)
+{
+    CV_Assert(frame0.type() == CV_32FC1);
+    CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
+    CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
+    CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
+    CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
+    CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
+
+    newFrame.create(frame0.size(), frame0.type());
+
+    buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
+    buf.setTo(Scalar::all(0));
+
+    // occlusion masks
+    GpuMat occ0 = buf.rowRange(0 * frame0.rows, 1 * frame0.rows);
+    GpuMat occ1 = buf.rowRange(1 * frame0.rows, 2 * frame0.rows);
+
+    // interpolated forward flow
+    GpuMat fui = buf.rowRange(2 * frame0.rows, 3 * frame0.rows);
+    GpuMat fvi = buf.rowRange(3 * frame0.rows, 4 * frame0.rows);
+
+    // interpolated backward flow
+    GpuMat bui = buf.rowRange(4 * frame0.rows, 5 * frame0.rows);
+    GpuMat bvi = buf.rowRange(5 * frame0.rows, 6 * frame0.rows);
+
+    size_t step = frame0.step;
+
+    CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
+    NppStStreamHandler h(stream);
+
+    NppStInterpolationState state;
+
+    state.size         = NcvSize32u(frame0.cols, frame0.rows);
+    state.nStep        = static_cast<Ncv32u>(step);
+    state.pSrcFrame0   = const_cast<Ncv32f*>(frame0.ptr<Ncv32f>());
+    state.pSrcFrame1   = const_cast<Ncv32f*>(frame1.ptr<Ncv32f>());
+    state.pFU          = const_cast<Ncv32f*>(fu.ptr<Ncv32f>());
+    state.pFV          = const_cast<Ncv32f*>(fv.ptr<Ncv32f>());
+    state.pBU          = const_cast<Ncv32f*>(bu.ptr<Ncv32f>());
+    state.pBV          = const_cast<Ncv32f*>(bv.ptr<Ncv32f>());
+    state.pos          = pos;
+    state.pNewFrame    = newFrame.ptr<Ncv32f>();
+    state.ppBuffers[0] = occ0.ptr<Ncv32f>();
+    state.ppBuffers[1] = occ1.ptr<Ncv32f>();
+    state.ppBuffers[2] = fui.ptr<Ncv32f>();
+    state.ppBuffers[3] = fvi.ptr<Ncv32f>();
+    state.ppBuffers[4] = bui.ptr<Ncv32f>();
+    state.ppBuffers[5] = bvi.ptr<Ncv32f>();
+
+    ncvSafeCall( nppiStInterpolateFrames(&state) );
+
+    if (stream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
+}
+
+#endif /* HAVE_CUDA */
diff --git a/modules/gpuoptflow/src/needle_map.cpp b/modules/gpuoptflow/src/needle_map.cpp
new file mode 100644
index 000000000..1fdc16262
--- /dev/null
+++ b/modules/gpuoptflow/src/needle_map.cpp
@@ -0,0 +1,100 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+
+void cv::gpu::createOpticalFlowNeedleMap(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+
+#else
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace optical_flow
+    {
+        void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg);
+        void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale);
+    }
+}}}
+
+void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors)
+{
+    using namespace cv::gpu::cudev::optical_flow;
+
+    CV_Assert(u.type() == CV_32FC1);
+    CV_Assert(v.type() == u.type() && v.size() == u.size());
+
+    const int NEEDLE_MAP_SCALE = 16;
+
+    const int x_needles = u.cols / NEEDLE_MAP_SCALE;
+    const int y_needles = u.rows / NEEDLE_MAP_SCALE;
+
+    GpuMat u_avg(y_needles, x_needles, CV_32FC1);
+    GpuMat v_avg(y_needles, x_needles, CV_32FC1);
+
+    NeedleMapAverage_gpu(u, v, u_avg, v_avg);
+
+    const int NUM_VERTS_PER_ARROW = 6;
+
+    const int num_arrows = x_needles * y_needles * NUM_VERTS_PER_ARROW;
+
+    vertex.create(1, num_arrows, CV_32FC3);
+    colors.create(1, num_arrows, CV_32FC3);
+
+    colors.setTo(Scalar::all(1.0));
+
+    double uMax, vMax;
+    gpu::minMax(u_avg, 0, &uMax);
+    gpu::minMax(v_avg, 0, &vMax);
+
+    float max_flow = static_cast<float>(std::sqrt(uMax * uMax + vMax * vMax));
+
+    CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);
+
+    cvtColor(colors, colors, COLOR_HSV2RGB);
+}
+
+#endif /* HAVE_CUDA */
diff --git a/modules/gpuoptflow/src/precomp.hpp b/modules/gpuoptflow/src/precomp.hpp
index 2bf47b228..27743b457 100644
--- a/modules/gpuoptflow/src/precomp.hpp
+++ b/modules/gpuoptflow/src/precomp.hpp
@@ -48,7 +48,7 @@
 #include "opencv2/gpuoptflow.hpp"
 #include "opencv2/gpuarithm.hpp"
 #include "opencv2/gpuwarping.hpp"
-
+#include "opencv2/gpuimgproc.hpp"
 #include "opencv2/video.hpp"
 
 #include "opencv2/core/gpu_private.hpp"
diff --git a/modules/gpuoptflow/src/pyrlk.cpp b/modules/gpuoptflow/src/pyrlk.cpp
index 00cc874b4..1992bf903 100644
--- a/modules/gpuoptflow/src/pyrlk.cpp
+++ b/modules/gpuoptflow/src/pyrlk.cpp
@@ -124,7 +124,7 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
 
     GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
     GpuMat temp2 = nextPts.reshape(1);
-    multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
+    gpu::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
 
     ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
     status.setTo(Scalar::all(1));
@@ -146,17 +146,17 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
     }
     else
     {
-        cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
+        gpu::cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
         buf_.convertTo(prevPyr_[0], CV_32F);
 
-        cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
+        gpu::cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
         buf_.convertTo(nextPyr_[0], CV_32F);
     }
 
     for (int level = 1; level <= maxLevel; ++level)
     {
-        pyrDown(prevPyr_[level - 1], prevPyr_[level]);
-        pyrDown(nextPyr_[level - 1], nextPyr_[level]);
+        gpu::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
+        gpu::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
     }
 
     pyrlk::loadConstants(make_int2(winSize.width, winSize.height), iters);
@@ -198,8 +198,8 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
 
     for (int level = 1; level <= maxLevel; ++level)
     {
-        pyrDown(prevPyr_[level - 1], prevPyr_[level]);
-        pyrDown(nextPyr_[level - 1], nextPyr_[level]);
+        gpu::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
+        gpu::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
     }
 
     ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
diff --git a/modules/gpuoptflow/test/test_optflow.cpp b/modules/gpuoptflow/test/test_optflow.cpp
index 893ab89dd..cf05ebc24 100644
--- a/modules/gpuoptflow/test/test_optflow.cpp
+++ b/modules/gpuoptflow/test/test_optflow.cpp
@@ -149,7 +149,7 @@ GPU_TEST_P(BroxOpticalFlow, OpticalFlowNan)
     EXPECT_TRUE(cv::checkRange(h_v));
 };
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, BroxOpticalFlow, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_OptFlow, BroxOpticalFlow, ALL_DEVICES);
 
 //////////////////////////////////////////////////////
 // PyrLKOpticalFlow
@@ -241,7 +241,7 @@ GPU_TEST_P(PyrLKOpticalFlow, Sparse)
     ASSERT_LE(bad_ratio, 0.01);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, PyrLKOpticalFlow, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_OptFlow, PyrLKOpticalFlow, testing::Combine(
     ALL_DEVICES,
     testing::Values(UseGray(true), UseGray(false))));
 
@@ -316,7 +316,7 @@ GPU_TEST_P(FarnebackOpticalFlow, Accuracy)
     EXPECT_MAT_SIMILAR(flowxy[1], d_flowy, 0.1);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, FarnebackOpticalFlow, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_OptFlow, FarnebackOpticalFlow, testing::Combine(
     ALL_DEVICES,
     testing::Values(PyrScale(0.3), PyrScale(0.5), PyrScale(0.8)),
     testing::Values(PolyN(5), PolyN(7)),
@@ -366,7 +366,7 @@ GPU_TEST_P(OpticalFlowDual_TVL1, Accuracy)
     EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, OpticalFlowDual_TVL1, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_OptFlow, OpticalFlowDual_TVL1, testing::Combine(
     ALL_DEVICES,
     WHOLE_SUBMAT));
 
@@ -425,7 +425,7 @@ GPU_TEST_P(OpticalFlowBM, Accuracy)
     EXPECT_MAT_NEAR(vely, d_vely, 0);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, OpticalFlowBM, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_OptFlow, OpticalFlowBM, ALL_DEVICES);
 
 //////////////////////////////////////////////////////
 // FastOpticalFlowBM
@@ -543,6 +543,6 @@ GPU_TEST_P(FastOpticalFlowBM, Accuracy)
     EXPECT_LE(err, MAX_RMSE);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_Video, FastOpticalFlowBM, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_OptFlow, FastOpticalFlowBM, ALL_DEVICES);
 
 #endif // HAVE_CUDA
diff --git a/modules/gpuoptflow/test/test_precomp.hpp b/modules/gpuoptflow/test/test_precomp.hpp
index 4f993dd8b..32a7443e8 100644
--- a/modules/gpuoptflow/test/test_precomp.hpp
+++ b/modules/gpuoptflow/test/test_precomp.hpp
@@ -57,8 +57,6 @@
 #include "opencv2/ts/gpu_test.hpp"
 
 #include "opencv2/gpuoptflow.hpp"
-#include "opencv2/gpuimgproc.hpp"
 #include "opencv2/video.hpp"
-#include "opencv2/legacy.hpp"
 
 #endif

From 5d7bbdc837be87671b19540a4320000aa40af355 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 12:36:07 +0400
Subject: [PATCH 38/49] gpu module fixes

---
 modules/gpu/CMakeLists.txt            |  5 +--
 modules/gpu/doc/gpu.rst               |  1 +
 modules/gpu/include/opencv2/gpu.hpp   | 61 +++++++++++++++++++++++----
 modules/gpu/perf4au/CMakeLists.txt    |  2 +-
 modules/gpu/perf4au/main.cpp          |  4 +-
 modules/gpu/src/cascadeclassifier.cpp |  5 ++-
 modules/gpu/src/precomp.hpp           |  6 +--
 7 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 1d108b00e..3c39dc152 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -4,10 +4,9 @@ endif()
 
 set(the_description "GPU-accelerated Computer Vision")
 
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_define_module(gpu opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
-                      opencv_gpufeatures2d opencv_gpuoptflow opencv_gpubgsegm opencv_gpustereo)
+ocv_define_module(gpu opencv_calib3d opencv_objdetect opencv_gpuarithm opencv_gpuwarping OPTIONAL opencv_gpulegacy)
 
 if(HAVE_CUDA)
   add_subdirectory(perf4au)
diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst
index bc3b9bdb1..2a0358e01 100644
--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@@ -9,3 +9,4 @@ gpu. GPU-accelerated Computer Vision
     initalization_and_information
     data_structures
     object_detection
+    calib3d
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 166a360a1..5dc9b9549 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -43,19 +43,56 @@
 #ifndef __OPENCV_GPU_HPP__
 #define __OPENCV_GPU_HPP__
 
+#ifndef __cplusplus
+#  error gpu.hpp header must be compiled as C++
+#endif
+
 #include "opencv2/core/gpumat.hpp"
-#include "opencv2/gpuarithm.hpp"
-#include "opencv2/gpufilters.hpp"
-#include "opencv2/gpuwarping.hpp"
-#include "opencv2/gpuimgproc.hpp"
-#include "opencv2/gpufeatures2d.hpp"
-#include "opencv2/gpuoptflow.hpp"
-#include "opencv2/gpubgsegm.hpp"
-#include "opencv2/gpustereo.hpp"
+
+#if !defined(__OPENCV_BUILD) && !defined(OPENCV_GPU_SKIP_INCLUDE)
+    #include "opencv2/opencv_modules.hpp"
+
+    #ifdef HAVE_OPENCV_GPUARITHM
+        #include "opencv2/gpuarithm.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUWARPING
+        #include "opencv2/gpuwarping.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUFILTERS
+        #include "opencv2/gpufilters.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUIMGPROC
+        #include "opencv2/gpuimgproc.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUFEATURES2D
+        #include "opencv2/gpufeatures2d.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUOPTFLOW
+        #include "opencv2/gpuoptflow.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUBGSEGM
+        #include "opencv2/gpubgsegm.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUSTEREO
+        #include "opencv2/gpustereo.hpp"
+    #endif
+
+    #ifdef HAVE_OPENCV_GPUCODEC
+        #include "opencv2/gpucodec.hpp"
+    #endif
+#endif
 
 namespace cv { namespace gpu {
 
 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+
 struct CV_EXPORTS HOGConfidence
 {
    double scale;
@@ -146,6 +183,8 @@ protected:
     std::vector<GpuMat> image_scales;
 };
 
+//////////////////////////// CascadeClassifier ////////////////////////////
+
 // The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
 class CV_EXPORTS CascadeClassifier_GPU
 {
@@ -175,6 +214,8 @@ private:
     friend class CascadeClassifier_GPU_LBP;
 };
 
+//////////////////////////// Labeling ////////////////////////////
+
 //!performs labeling via graph cuts of a 2D regular 4-connected graph.
 CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
                          GpuMat& buf, Stream& stream = Stream::Null());
@@ -191,6 +232,8 @@ CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Sc
 //! performs connected componnents labeling.
 CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
 
+//////////////////////////// Calib3d ////////////////////////////
+
 CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
                                 GpuMat& dst, Stream& stream = Stream::Null());
 
@@ -203,6 +246,8 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c
                                int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
                                std::vector<int>* inliers=NULL);
 
+//////////////////////////// VStab ////////////////////////////
+
 //! removes points (CV_32FC2, single row matrix) with zero mask value
 CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);
 
diff --git a/modules/gpu/perf4au/CMakeLists.txt b/modules/gpu/perf4au/CMakeLists.txt
index 376e7b270..cb395069d 100644
--- a/modules/gpu/perf4au/CMakeLists.txt
+++ b/modules/gpu/perf4au/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(PERF4AU_REQUIRED_DEPS opencv_core opencv_imgproc opencv_highgui opencv_video opencv_legacy opencv_gpu opencv_ts)
+set(PERF4AU_REQUIRED_DEPS opencv_core opencv_imgproc opencv_highgui opencv_video opencv_legacy opencv_ml opencv_ts opencv_gpufilters opencv_gpuimgproc opencv_gpuoptflow)
 
 ocv_check_dependencies(${PERF4AU_REQUIRED_DEPS})
 
diff --git a/modules/gpu/perf4au/main.cpp b/modules/gpu/perf4au/main.cpp
index 30e63d541..ce40d61cb 100644
--- a/modules/gpu/perf4au/main.cpp
+++ b/modules/gpu/perf4au/main.cpp
@@ -43,7 +43,9 @@
 #include "opencv2/ts.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
-#include "opencv2/gpu.hpp"
+#include "opencv2/gpuimgproc.hpp"
+#include "opencv2/gpuoptflow.hpp"
+
 #include "opencv2/highgui.hpp"
 #include "opencv2/video.hpp"
 #include "opencv2/legacy.hpp"
diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp
index d9f34a54a..454c10591 100644
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -75,7 +75,7 @@ public:
 
 #ifndef HAVE_OPENCV_GPULEGACY
 
-struct cv::gpu::CascadeClassifier_GPU::HaarCascade
+struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
 {
 public:
     HaarCascade()
@@ -508,6 +508,8 @@ private:
             resuzeBuffer.create(frame, CV_8UC1);
 
             integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
+
+#ifdef HAVE_OPENCV_GPULEGACY
             NcvSize32u roiSize;
             roiSize.width = frame.width;
             roiSize.height = frame.height;
@@ -518,6 +520,7 @@ private:
             Ncv32u bufSize;
             ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
             integralBuffer.create(1, bufSize, CV_8UC1);
+#endif
 
             candidates.create(1 , frame.width >> 1, CV_32SC4);
         }
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index 0b468a3e9..ceb560f93 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -43,10 +43,9 @@
 #ifndef __OPENCV_PRECOMP_H__
 #define __OPENCV_PRECOMP_H__
 
-#include <vector>
-#include <iostream>
-
 #include "opencv2/gpu.hpp"
+#include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpuwarping.hpp"
 #include "opencv2/calib3d.hpp"
 #include "opencv2/objdetect.hpp"
 
@@ -55,7 +54,6 @@
 #include "opencv2/opencv_modules.hpp"
 
 #ifdef HAVE_OPENCV_GPULEGACY
-#  include "opencv2/gpulegacy.hpp"
 #  include "opencv2/gpulegacy/private.hpp"
 #endif
 

From 12f16b7a262862d56115ff38ca85bd438499f523 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:10:55 +0400
Subject: [PATCH 39/49] nonfree module fixes

---
 modules/nonfree/CMakeLists.txt                  |  3 ++-
 modules/nonfree/include/opencv2/nonfree/gpu.hpp | 12 ++----------
 modules/nonfree/perf/perf_gpu.cpp               | 10 +++++++---
 modules/nonfree/perf/perf_precomp.hpp           |  5 ++---
 modules/nonfree/src/cuda/surf.cu                |  5 ++---
 modules/nonfree/src/cuda/vibe.cu                |  6 ------
 modules/nonfree/src/precomp.hpp                 |  8 +++++---
 modules/nonfree/src/surf_gpu.cpp                | 10 +++-------
 modules/nonfree/src/vibe_gpu.cpp                |  6 +-----
 modules/nonfree/test/test_gpu.cpp               |  8 ++++++--
 modules/nonfree/test/test_precomp.hpp           |  2 +-
 11 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/modules/nonfree/CMakeLists.txt b/modules/nonfree/CMakeLists.txt
index 55f6b410c..fa73931aa 100644
--- a/modules/nonfree/CMakeLists.txt
+++ b/modules/nonfree/CMakeLists.txt
@@ -3,4 +3,5 @@ if(BUILD_ANDROID_PACKAGE)
 endif()
 
 set(the_description "Functionality with possible limitations on the use")
-ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_gpu opencv_ocl)
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
+ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_gpuarithm opencv_ocl)
diff --git a/modules/nonfree/include/opencv2/nonfree/gpu.hpp b/modules/nonfree/include/opencv2/nonfree/gpu.hpp
index aa0c9cba0..1be62b294 100644
--- a/modules/nonfree/include/opencv2/nonfree/gpu.hpp
+++ b/modules/nonfree/include/opencv2/nonfree/gpu.hpp
@@ -43,11 +43,7 @@
 #ifndef __OPENCV_NONFREE_GPU_HPP__
 #define __OPENCV_NONFREE_GPU_HPP__
 
-#include "opencv2/opencv_modules.hpp"
-
-#if defined(HAVE_OPENCV_GPU)
-
-#include "opencv2/gpu.hpp"
+#include "opencv2/core/gpumat.hpp"
 
 namespace cv { namespace gpu {
 
@@ -160,10 +156,6 @@ private:
     GpuMat samples_;
 };
 
-} // namespace gpu
-
-} // namespace cv
-
-#endif // defined(HAVE_OPENCV_GPU)
+}} // namespace cv { namespace gpu {
 
 #endif // __OPENCV_NONFREE_GPU_HPP__
diff --git a/modules/nonfree/perf/perf_gpu.cpp b/modules/nonfree/perf/perf_gpu.cpp
index eeaf5a7f6..3caa5a262 100644
--- a/modules/nonfree/perf/perf_gpu.cpp
+++ b/modules/nonfree/perf/perf_gpu.cpp
@@ -42,7 +42,7 @@
 
 #include "perf_precomp.hpp"
 
-#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
+#ifdef HAVE_CUDA
 
 #include "opencv2/ts/gpu_perf.hpp"
 
@@ -65,6 +65,8 @@ using namespace perf;
 //////////////////////////////////////////////////////////////////////
 // SURF
 
+#ifdef HAVE_OPENCV_GPUARITHM
+
 DEF_PARAM_TEST_1(Image, string);
 
 PERF_TEST_P(Image, GPU_SURF,
@@ -108,6 +110,8 @@ PERF_TEST_P(Image, GPU_SURF,
     }
 }
 
+#endif // HAVE_OPENCV_GPUARITHM
+
 //////////////////////////////////////////////////////
 // VIBE
 
@@ -177,6 +181,6 @@ PERF_TEST_P(Video_Cn, GPU_VIBE,
     }
 }
 
-#endif
+#endif // BUILD_WITH_VIDEO_INPUT_SUPPORT
 
-#endif
+#endif // HAVE_CUDA
diff --git a/modules/nonfree/perf/perf_precomp.hpp b/modules/nonfree/perf/perf_precomp.hpp
index 13e161656..5e3f3c3cb 100644
--- a/modules/nonfree/perf/perf_precomp.hpp
+++ b/modules/nonfree/perf/perf_precomp.hpp
@@ -19,9 +19,8 @@
 #  include "opencv2/nonfree/ocl.hpp"
 #endif
 
-#ifdef HAVE_OPENCV_GPU
-    #include "opencv2/nonfree/gpu.hpp"
-    #include "opencv2/ts/gpu_perf.hpp"
+#ifdef HAVE_CUDA
+#  include "opencv2/nonfree/gpu.hpp"
 #endif
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
diff --git a/modules/nonfree/src/cuda/surf.cu b/modules/nonfree/src/cuda/surf.cu
index 46e8d2e5f..cdd54e4ca 100644
--- a/modules/nonfree/src/cuda/surf.cu
+++ b/modules/nonfree/src/cuda/surf.cu
@@ -42,7 +42,7 @@
 
 #include "opencv2/opencv_modules.hpp"
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUARITHM
 
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/limits.hpp"
@@ -957,5 +957,4 @@ namespace cv { namespace gpu { namespace cudev
     } // namespace surf
 }}} // namespace cv { namespace gpu { namespace cudev
 
-
-#endif /* CUDA_DISABLER */
+#endif // HAVE_OPENCV_GPUARITHM
diff --git a/modules/nonfree/src/cuda/vibe.cu b/modules/nonfree/src/cuda/vibe.cu
index b896e6a0f..951b96486 100644
--- a/modules/nonfree/src/cuda/vibe.cu
+++ b/modules/nonfree/src/cuda/vibe.cu
@@ -40,10 +40,6 @@
 //
 //M*/
 
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_GPU
-
 #include "opencv2/core/cuda/common.hpp"
 
 namespace cv { namespace gpu { namespace cudev
@@ -267,5 +263,3 @@ namespace cv { namespace gpu { namespace cudev
         }
     }
 }}}
-
-#endif
diff --git a/modules/nonfree/src/precomp.hpp b/modules/nonfree/src/precomp.hpp
index cc0098c98..f89e1566e 100644
--- a/modules/nonfree/src/precomp.hpp
+++ b/modules/nonfree/src/precomp.hpp
@@ -49,11 +49,13 @@
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/private.hpp"
 
+#include "opencv2/nonfree/gpu.hpp"
+#include "opencv2/core/gpu_private.hpp"
+
 #include "opencv2/opencv_modules.hpp"
 
-#ifdef HAVE_OPENCV_GPU
-#  include "opencv2/nonfree/gpu.hpp"
-#  include "opencv2/core/gpu_private.hpp"
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
 #endif
 
 #ifdef HAVE_OPENCV_OCL
diff --git a/modules/nonfree/src/surf_gpu.cpp b/modules/nonfree/src/surf_gpu.cpp
index 328b3cbef..ace9bb53a 100644
--- a/modules/nonfree/src/surf_gpu.cpp
+++ b/modules/nonfree/src/surf_gpu.cpp
@@ -42,12 +42,10 @@
 
 #include "precomp.hpp"
 
-#if defined(HAVE_OPENCV_GPU)
-
 using namespace cv;
 using namespace cv::gpu;
 
-#if !defined (HAVE_CUDA)
+#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPUARITHM)
 
 cv::gpu::SURF_GPU::SURF_GPU() { throw_no_cuda(); }
 cv::gpu::SURF_GPU::SURF_GPU(double, int, int, bool, float, bool) { throw_no_cuda(); }
@@ -144,13 +142,13 @@ namespace
 
             bindImgTex(img);
 
-            integralBuffered(img, surf_.sum, surf_.intBuffer);
+            gpu::integralBuffered(img, surf_.sum, surf_.intBuffer);
             sumOffset = bindSumTex(surf_.sum);
 
             if (use_mask)
             {
                 min(mask, 1.0, surf_.mask1);
-                integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer);
+                gpu::integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer);
                 maskOffset = bindMaskSumTex(surf_.maskSum);
             }
         }
@@ -421,5 +419,3 @@ void cv::gpu::SURF_GPU::releaseMemory()
 }
 
 #endif // !defined (HAVE_CUDA)
-
-#endif // defined(HAVE_OPENCV_GPU)
diff --git a/modules/nonfree/src/vibe_gpu.cpp b/modules/nonfree/src/vibe_gpu.cpp
index 6bf993838..2d291fb11 100644
--- a/modules/nonfree/src/vibe_gpu.cpp
+++ b/modules/nonfree/src/vibe_gpu.cpp
@@ -42,9 +42,7 @@
 
 #include "precomp.hpp"
 
-#if defined(HAVE_OPENCV_GPU)
-
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+#ifndef HAVE_CUDA
 
 cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long) { throw_no_cuda(); }
 void cv::gpu::VIBE_GPU::initialize(const GpuMat&, Stream&) { throw_no_cuda(); }
@@ -137,5 +135,3 @@ void cv::gpu::VIBE_GPU::release()
 }
 
 #endif
-
-#endif // defined(HAVE_OPENCV_GPU)
diff --git a/modules/nonfree/test/test_gpu.cpp b/modules/nonfree/test/test_gpu.cpp
index 30aec352c..42b2d0605 100644
--- a/modules/nonfree/test/test_gpu.cpp
+++ b/modules/nonfree/test/test_gpu.cpp
@@ -42,13 +42,15 @@
 
 #include "test_precomp.hpp"
 
-#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
+#ifdef HAVE_CUDA
 
 using namespace cvtest;
 
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // SURF
 
+#ifdef HAVE_OPENCV_GPUARITHM
+
 namespace
 {
     IMPLEMENT_PARAM_CLASS(SURF_HessianThreshold, double)
@@ -191,6 +193,8 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
     testing::Values(SURF_Extended(false), SURF_Extended(true)),
     testing::Values(SURF_Upright(false), SURF_Upright(true))));
 
+#endif // HAVE_OPENCV_GPUARITHM
+
 //////////////////////////////////////////////////////
 // VIBE
 
@@ -229,4 +233,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
     WHOLE_SUBMAT));
 
-#endif
+#endif // HAVE_CUDA
diff --git a/modules/nonfree/test/test_precomp.hpp b/modules/nonfree/test/test_precomp.hpp
index cacf37267..563dad43b 100644
--- a/modules/nonfree/test/test_precomp.hpp
+++ b/modules/nonfree/test/test_precomp.hpp
@@ -24,7 +24,7 @@
 #  include "opencv2/nonfree/ocl.hpp"
 #endif
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_CUDA
 #  include "opencv2/nonfree/gpu.hpp"
 #endif
 

From f375c86dfbf3b82fad0d040fa1842db56d954670 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:33:42 +0400
Subject: [PATCH 40/49] photo module fixes

---
 modules/photo/CMakeLists.txt              |  2 +-
 modules/photo/perf/perf_gpu.cpp           | 12 +++++++---
 modules/photo/src/cuda/nlm.cu             |  5 ----
 modules/photo/src/denoising_gpu.cpp       | 17 +++++++++-----
 modules/photo/test/test_denoising_gpu.cpp | 28 ++++++++++++-----------
 5 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/modules/photo/CMakeLists.txt b/modules/photo/CMakeLists.txt
index b3e0e18dc..5b47d3a1d 100644
--- a/modules/photo/CMakeLists.txt
+++ b/modules/photo/CMakeLists.txt
@@ -4,4 +4,4 @@ if(HAVE_CUDA)
   ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
 endif()
 
-ocv_define_module(photo opencv_imgproc OPTIONAL opencv_gpuimgproc)
+ocv_define_module(photo opencv_imgproc OPTIONAL opencv_gpuarithm opencv_gpuimgproc)
diff --git a/modules/photo/perf/perf_gpu.cpp b/modules/photo/perf/perf_gpu.cpp
index f33dd8199..ec62f7a0c 100644
--- a/modules/photo/perf/perf_gpu.cpp
+++ b/modules/photo/perf/perf_gpu.cpp
@@ -45,6 +45,10 @@
 #include "opencv2/photo/gpu.hpp"
 #include "opencv2/ts/gpu_perf.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#if defined (HAVE_CUDA) && defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUIMGPROC)
+
 using namespace std;
 using namespace testing;
 using namespace perf;
@@ -56,7 +60,7 @@ using namespace perf;
 
 DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
 
-PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
+PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_NonLocalMeans,
             Combine(GPU_DENOISING_IMAGE_SIZES,
                     Values<MatDepth>(CV_8U),
                     GPU_CHANNELS_1_3,
@@ -100,7 +104,7 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
 
 DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
 
-PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
+PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_FastNonLocalMeans,
             Combine(GPU_DENOISING_IMAGE_SIZES,
                     Values<MatDepth>(CV_8U),
                     GPU_CHANNELS_1_3,
@@ -146,7 +150,7 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
 
 DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int);
 
-PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored,
+PERF_TEST_P(Sz_Depth_WinSz_BlockSz, GPU_FastNonLocalMeansColored,
             Combine(GPU_DENOISING_IMAGE_SIZES,
                     Values<MatDepth>(CV_8U),
                     Values(21),
@@ -185,3 +189,5 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored,
         CPU_SANITY_CHECK(dst);
     }
 }
+
+#endif
diff --git a/modules/photo/src/cuda/nlm.cu b/modules/photo/src/cuda/nlm.cu
index 03044697d..44ed4dc6b 100644
--- a/modules/photo/src/cuda/nlm.cu
+++ b/modules/photo/src/cuda/nlm.cu
@@ -40,8 +40,6 @@
 //
 //M*/
 
-#if !defined CUDA_DISABLER
-
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/vec_math.hpp"
@@ -564,6 +562,3 @@ namespace cv { namespace gpu { namespace cudev
         }
     }
 }}}
-
-
-#endif /* CUDA_DISABLER */
diff --git a/modules/photo/src/denoising_gpu.cpp b/modules/photo/src/denoising_gpu.cpp
index 65d6f8121..954b1611b 100644
--- a/modules/photo/src/denoising_gpu.cpp
+++ b/modules/photo/src/denoising_gpu.cpp
@@ -44,13 +44,21 @@
 
 #include "opencv2/photo/gpu.hpp"
 #include "opencv2/core/gpu_private.hpp"
-#include "opencv2/gpuarithm.hpp"
-#include "opencv2/gpuimgproc.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUIMGPROC
+#  include "opencv2/gpuimgproc.hpp"
+#endif
 
 using namespace cv;
 using namespace cv::gpu;
 
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+#if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_GPUARITHM) || !defined(HAVE_OPENCV_GPUIMGPROC)
 
 void cv::gpu::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
 void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
@@ -147,7 +155,4 @@ void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat&
     cv::gpu::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s);
 }
 
-
 #endif
-
-
diff --git a/modules/photo/test/test_denoising_gpu.cpp b/modules/photo/test/test_denoising_gpu.cpp
index f46a49fc1..caf3b23f2 100644
--- a/modules/photo/test/test_denoising_gpu.cpp
+++ b/modules/photo/test/test_denoising_gpu.cpp
@@ -45,18 +45,20 @@
 #include "opencv2/photo/gpu.hpp"
 #include "opencv2/ts/gpu_test.hpp"
 
-#ifdef HAVE_CUDA
+#include "opencv2/opencv_modules.hpp"
+
+#if defined (HAVE_CUDA) && defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUIMGPROC)
 
 using namespace cvtest;
 
 ////////////////////////////////////////////////////////
 // Brute Force Non local means
 
-TEST(BruteForceNonLocalMeans, Regression)
+TEST(GPU_BruteForceNonLocalMeans, Regression)
 {
     using cv::gpu::GpuMat;
 
-    cv::Mat bgr  = readImage("denoising/lena_noised_gaussian_sigma=20_multi_0.png", cv::IMREAD_COLOR);
+    cv::Mat bgr  = readImage("../gpu/denoising/lena_noised_gaussian_sigma=20_multi_0.png", cv::IMREAD_COLOR);
     ASSERT_FALSE(bgr.empty());
 
     cv::Mat gray;
@@ -67,12 +69,12 @@ TEST(BruteForceNonLocalMeans, Regression)
     cv::gpu::nonLocalMeans(GpuMat(gray), dgray, 20);
 
 #if 0
-    dumpImage("denoising/nlm_denoised_lena_bgr.png", cv::Mat(dbgr));
-    dumpImage("denoising/nlm_denoised_lena_gray.png", cv::Mat(dgray));
+    dumpImage("../gpu/denoising/nlm_denoised_lena_bgr.png", cv::Mat(dbgr));
+    dumpImage("../gpu/denoising/nlm_denoised_lena_gray.png", cv::Mat(dgray));
 #endif
 
-    cv::Mat bgr_gold  = readImage("denoising/nlm_denoised_lena_bgr.png", cv::IMREAD_COLOR);
-    cv::Mat gray_gold  = readImage("denoising/nlm_denoised_lena_gray.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat bgr_gold  = readImage("../gpu/denoising/nlm_denoised_lena_bgr.png", cv::IMREAD_COLOR);
+    cv::Mat gray_gold  = readImage("../gpu/denoising/nlm_denoised_lena_gray.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(bgr_gold.empty() || gray_gold.empty());
 
     EXPECT_MAT_NEAR(bgr_gold, dbgr, 1e-4);
@@ -82,11 +84,11 @@ TEST(BruteForceNonLocalMeans, Regression)
 ////////////////////////////////////////////////////////
 // Fast Force Non local means
 
-TEST(FastNonLocalMeans, Regression)
+TEST(GPU_FastNonLocalMeans, Regression)
 {
     using cv::gpu::GpuMat;
 
-    cv::Mat bgr  = readImage("denoising/lena_noised_gaussian_sigma=20_multi_0.png", cv::IMREAD_COLOR);
+    cv::Mat bgr  = readImage("../gpu/denoising/lena_noised_gaussian_sigma=20_multi_0.png", cv::IMREAD_COLOR);
     ASSERT_FALSE(bgr.empty());
 
     cv::Mat gray;
@@ -99,12 +101,12 @@ TEST(FastNonLocalMeans, Regression)
     fnlmd.labMethod(GpuMat(bgr),  dbgr, 20, 10);
 
 #if 0
-    dumpImage("denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));
-    dumpImage("denoising/fnlm_denoised_lena_gray.png", cv::Mat(dgray));
+    dumpImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));
+    dumpImage("../gpu/denoising/fnlm_denoised_lena_gray.png", cv::Mat(dgray));
 #endif
 
-    cv::Mat bgr_gold  = readImage("denoising/fnlm_denoised_lena_bgr.png", cv::IMREAD_COLOR);
-    cv::Mat gray_gold  = readImage("denoising/fnlm_denoised_lena_gray.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat bgr_gold  = readImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::IMREAD_COLOR);
+    cv::Mat gray_gold  = readImage("../gpu/denoising/fnlm_denoised_lena_gray.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(bgr_gold.empty() || gray_gold.empty());
 
     EXPECT_MAT_NEAR(bgr_gold, dbgr, 1);

From 033dd77504198dfe79bd999085cc135b0a88d489 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:11:14 +0400
Subject: [PATCH 41/49] superres module fixes

---
 modules/superres/CMakeLists.txt              |  4 +++-
 modules/superres/src/btv_l1_gpu.cpp          |  7 +++++-
 modules/superres/src/cuda/btv_l1_gpu.cu      |  6 +++++
 modules/superres/src/input_array_utility.cpp |  2 +-
 modules/superres/src/optical_flow.cpp        |  6 ++---
 modules/superres/src/precomp.hpp             | 23 +++++++++++++++++---
 modules/superres/test/test_superres.cpp      |  2 +-
 7 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt
index 378a2a942..065d8ced1 100644
--- a/modules/superres/CMakeLists.txt
+++ b/modules/superres/CMakeLists.txt
@@ -4,4 +4,6 @@ endif()
 
 set(the_description "Super Resolution")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
-ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_highgui opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_gpucodec)
+ocv_define_module(superres opencv_imgproc opencv_video
+                  OPTIONAL opencv_highgui
+                  opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc opencv_gpuoptflow opencv_gpucodec)
diff --git a/modules/superres/src/btv_l1_gpu.cpp b/modules/superres/src/btv_l1_gpu.cpp
index a3f11322e..6813187c4 100644
--- a/modules/superres/src/btv_l1_gpu.cpp
+++ b/modules/superres/src/btv_l1_gpu.cpp
@@ -50,7 +50,7 @@ using namespace cv::gpu;
 using namespace cv::superres;
 using namespace cv::superres::detail;
 
-#if !defined(HAVE_CUDA) || !defined(HAVE_OPENCV_GPU)
+#if !defined(HAVE_CUDA) || !defined(HAVE_OPENCV_GPUARITHM) || !defined(HAVE_OPENCV_GPUWARPING) || !defined(HAVE_OPENCV_GPUFILTERS)
 
 Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_GPU()
 {
@@ -266,7 +266,12 @@ namespace
         btvKernelSize_ = 7;
         blurKernelSize_ = 5;
         blurSigma_ = 0.0;
+
+#ifdef HAVE_OPENCV_GPUOPTFLOW
         opticalFlow_ = createOptFlow_Farneback_GPU();
+#else
+        opticalFlow_ = createOptFlow_Farneback();
+#endif
 
         curBlurKernelSize_ = -1;
         curBlurSigma_ = -1.0;
diff --git a/modules/superres/src/cuda/btv_l1_gpu.cu b/modules/superres/src/cuda/btv_l1_gpu.cu
index 58e46faac..22b3e0abf 100644
--- a/modules/superres/src/cuda/btv_l1_gpu.cu
+++ b/modules/superres/src/cuda/btv_l1_gpu.cu
@@ -40,6 +40,10 @@
 //
 //M*/
 
+#include "opencv2/opencv_modules.hpp"
+
+#if defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUWARPING) && defined(HAVE_OPENCV_GPUFILTERS)
+
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/transform.hpp"
 #include "opencv2/core/cuda/vec_traits.hpp"
@@ -232,3 +236,5 @@ namespace btv_l1_cudev
     template void calcBtvRegularization<3>(PtrStepSzb src, PtrStepSzb dst, int ksize);
     template void calcBtvRegularization<4>(PtrStepSzb src, PtrStepSzb dst, int ksize);
 }
+
+#endif
diff --git a/modules/superres/src/input_array_utility.cpp b/modules/superres/src/input_array_utility.cpp
index 327d2db5e..c2850206b 100644
--- a/modules/superres/src/input_array_utility.cpp
+++ b/modules/superres/src/input_array_utility.cpp
@@ -177,7 +177,7 @@ namespace
         switch (src.kind())
         {
         case _InputArray::GPU_MAT:
-            #ifdef HAVE_OPENCV_GPU
+            #ifdef HAVE_OPENCV_GPUIMGPROC
                 gpu::cvtColor(src.getGpuMat(), dst.getGpuMatRef(), code, cn);
             #else
                 CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform");
diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp
index 713ed0122..242b91ee6 100644
--- a/modules/superres/src/optical_flow.cpp
+++ b/modules/superres/src/optical_flow.cpp
@@ -343,7 +343,7 @@ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1()
 ///////////////////////////////////////////////////////////////////
 // GpuOpticalFlow
 
-#ifndef HAVE_OPENCV_GPU
+#ifndef HAVE_OPENCV_GPUOPTFLOW
 
 Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback_GPU()
 {
@@ -369,7 +369,7 @@ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_PyrLK_GPU()
     return Ptr<DenseOpticalFlowExt>();
 }
 
-#else // HAVE_OPENCV_GPU
+#else // HAVE_OPENCV_GPUOPTFLOW
 
 namespace
 {
@@ -717,4 +717,4 @@ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_GPU()
     return new DualTVL1_GPU;
 }
 
-#endif // HAVE_OPENCV_GPU
+#endif // HAVE_OPENCV_GPUOPTFLOW
diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp
index 960d9b71c..73074c420 100644
--- a/modules/superres/src/precomp.hpp
+++ b/modules/superres/src/precomp.hpp
@@ -55,9 +55,26 @@
 #include "opencv2/video/tracking.hpp"
 #include "opencv2/core/private.hpp"
 
-#ifdef HAVE_OPENCV_GPU
-#  include "opencv2/gpu.hpp"
-#  include "opencv2/core/gpu_private.hpp"
+#include "opencv2/core/gpu_private.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUWARPING
+#  include "opencv2/gpuwarping.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUFILTERS
+#  include "opencv2/gpufilters.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUIMGPROC
+#  include "opencv2/gpuimgproc.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUOPTFLOW
+#  include "opencv2/gpuoptflow.hpp"
 #endif
 
 #ifdef HAVE_OPENCV_GPUCODEC
diff --git a/modules/superres/test/test_superres.cpp b/modules/superres/test/test_superres.cpp
index b4a546c62..07abd04fa 100644
--- a/modules/superres/test/test_superres.cpp
+++ b/modules/superres/test/test_superres.cpp
@@ -268,7 +268,7 @@ TEST_F(SuperResolution, BTVL1)
     RunTest(cv::superres::createSuperResolution_BTVL1());
 }
 
-#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
+#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUWARPING) && defined(HAVE_OPENCV_GPUFILTERS)
 
 TEST_F(SuperResolution, BTVL1_GPU)
 {

From fdeec6896c4084f72ead53fe600eb34512cc7a0e Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 13:28:30 +0400
Subject: [PATCH 42/49] stitching module fixes

---
 modules/stitching/CMakeLists.txt              |  3 ++-
 .../opencv2/stitching/detail/matchers.hpp     |  7 +++---
 .../opencv2/stitching/detail/warpers.hpp      |  6 ++---
 .../include/opencv2/stitching/warpers.hpp     |  2 +-
 modules/stitching/src/blenders.cpp            | 10 ++++----
 modules/stitching/src/matchers.cpp            | 23 ++++++++++---------
 modules/stitching/src/precomp.hpp             | 20 +++++++++++++---
 modules/stitching/src/stitcher.cpp            |  2 +-
 modules/stitching/src/warpers.cpp             |  2 +-
 samples/cpp/stitching_detailed.cpp            |  4 ++--
 10 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/modules/stitching/CMakeLists.txt b/modules/stitching/CMakeLists.txt
index 647d8b151..fa9adfc8e 100644
--- a/modules/stitching/CMakeLists.txt
+++ b/modules/stitching/CMakeLists.txt
@@ -1,3 +1,4 @@
 set(the_description "Images stitching")
-ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_nonfree)
+ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect
+                  OPTIONAL opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_gpufeatures2d opencv_nonfree)
 
diff --git a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
index 7461a67c3..0b24e784f 100644
--- a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
@@ -47,8 +47,9 @@
 #include "opencv2/features2d.hpp"
 
 #include "opencv2/opencv_modules.hpp"
-#if defined(HAVE_OPENCV_NONFREE) && defined(HAVE_OPENCV_GPU)
-#include "opencv2/nonfree/gpu.hpp"
+
+#ifdef HAVE_OPENCV_NONFREE
+#  include "opencv2/nonfree/gpu.hpp"
 #endif
 
 namespace cv {
@@ -103,7 +104,7 @@ private:
 };
 
 
-#if defined(HAVE_OPENCV_NONFREE) && defined(HAVE_OPENCV_GPU)
+#ifdef HAVE_OPENCV_NONFREE
 class CV_EXPORTS SurfFeaturesFinderGpu : public FeaturesFinder
 {
 public:
diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
index 693363d59..55b4d3a8f 100644
--- a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
@@ -44,11 +44,9 @@
 #define __OPENCV_STITCHING_WARPERS_HPP__
 
 #include "opencv2/core.hpp"
+#include "opencv2/core/gpumat.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/opencv_modules.hpp"
-#ifdef HAVE_OPENCV_GPU
-# include "opencv2/gpu.hpp"
-#endif
 
 namespace cv {
 namespace detail {
@@ -329,7 +327,7 @@ public:
 };
 
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUWARPING
 class CV_EXPORTS PlaneWarperGpu : public PlaneWarper
 {
 public:
diff --git a/modules/stitching/include/opencv2/stitching/warpers.hpp b/modules/stitching/include/opencv2/stitching/warpers.hpp
index 7475d1304..9d62d356b 100644
--- a/modules/stitching/include/opencv2/stitching/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/warpers.hpp
@@ -145,7 +145,7 @@ public:
 
 
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUWARPING
 class PlaneWarperGpu: public WarperCreator
 {
 public:
diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp
index 0a534643e..0043b7348 100644
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@@ -187,12 +187,14 @@ Rect FeatherBlender::createWeightMaps(const std::vector<Mat> &masks, const std::
 MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
 {
     setNumBands(num_bands);
-#ifdef HAVE_OPENCV_GPU
+
+#if defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUWARPING)
     can_use_gpu_ = try_gpu && gpu::getCudaEnabledDeviceCount();
 #else
-    (void)try_gpu;
+    (void) try_gpu;
     can_use_gpu_ = false;
 #endif
+
     CV_Assert(weight_type == CV_32F || weight_type == CV_16S);
     weight_type_ = weight_type;
 }
@@ -489,7 +491,7 @@ void createLaplacePyr(const Mat &img, int num_levels, std::vector<Mat> &pyr)
 
 void createLaplacePyrGpu(const Mat &img, int num_levels, std::vector<Mat> &pyr)
 {
-#ifdef HAVE_OPENCV_GPU
+#if defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUWARPING)
     pyr.resize(num_levels + 1);
 
     std::vector<gpu::GpuMat> gpu_pyr(num_levels + 1);
@@ -529,7 +531,7 @@ void restoreImageFromLaplacePyr(std::vector<Mat> &pyr)
 
 void restoreImageFromLaplacePyrGpu(std::vector<Mat> &pyr)
 {
-#ifdef HAVE_OPENCV_GPU
+#if defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUWARPING)
     if (pyr.empty())
         return;
 
diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp
index 3a48711a8..fa1f51e41 100644
--- a/modules/stitching/src/matchers.cpp
+++ b/modules/stitching/src/matchers.cpp
@@ -44,10 +44,7 @@
 
 using namespace cv;
 using namespace cv::detail;
-
-#ifdef HAVE_OPENCV_GPU
 using namespace cv::gpu;
-#endif
 
 #ifdef HAVE_OPENCV_NONFREE
 #include "opencv2/nonfree.hpp"
@@ -132,7 +129,7 @@ private:
     float match_conf_;
 };
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUFEATURES2D
 class GpuMatcher : public FeaturesMatcher
 {
 public:
@@ -207,7 +204,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
     LOG("1->2 & 2->1 matches: " << matches_info.matches.size() << endl);
 }
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUFEATURES2D
 void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &features2, MatchesInfo& matches_info)
 {
     matches_info.matches.clear();
@@ -435,7 +432,7 @@ void OrbFeaturesFinder::find(const Mat &image, ImageFeatures &features)
     }
 }
 
-#if defined(HAVE_OPENCV_NONFREE) && defined(HAVE_OPENCV_GPU)
+#ifdef HAVE_OPENCV_NONFREE
 SurfFeaturesFinderGpu::SurfFeaturesFinderGpu(double hess_thresh, int num_octaves, int num_layers,
                                              int num_octaves_descr, int num_layers_descr)
 {
@@ -536,14 +533,18 @@ void FeaturesMatcher::operator ()(const std::vector<ImageFeatures> &features, st
 
 BestOf2NearestMatcher::BestOf2NearestMatcher(bool try_use_gpu, float match_conf, int num_matches_thresh1, int num_matches_thresh2)
 {
-#ifdef HAVE_OPENCV_GPU
-    if (try_use_gpu && getCudaEnabledDeviceCount() > 0)
-        impl_ = new GpuMatcher(match_conf);
-    else
-#else
     (void)try_use_gpu;
+
+#ifdef HAVE_OPENCV_GPUFEATURES2D
+    if (try_use_gpu && getCudaEnabledDeviceCount() > 0)
+    {
+        impl_ = new GpuMatcher(match_conf);
+    }
+    else
 #endif
+    {
         impl_ = new CpuMatcher(match_conf);
+    }
 
     is_thread_safe_ = impl_->isThreadSafe();
     num_matches_thresh1_ = num_matches_thresh1;
diff --git a/modules/stitching/src/precomp.hpp b/modules/stitching/src/precomp.hpp
index 9e056bc0f..d802d0dcc 100644
--- a/modules/stitching/src/precomp.hpp
+++ b/modules/stitching/src/precomp.hpp
@@ -66,11 +66,25 @@
 #include "opencv2/imgproc.hpp"
 #include "opencv2/features2d.hpp"
 #include "opencv2/calib3d.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUWARPING
+#  include "opencv2/gpuwarping.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPUFEATURES2D
+#  include "opencv2/gpufeatures2d.hpp"
+#endif
+
 #ifdef HAVE_OPENCV_GPU
 #  include "opencv2/gpu.hpp"
-#  ifdef HAVE_OPENCV_NONFREE
-#    include "opencv2/nonfree/gpu.hpp"
-#  endif
+#endif
+
+#ifdef HAVE_OPENCV_NONFREE
+#  include "opencv2/nonfree/gpu.hpp"
 #endif
 
 #include "../../imgproc/src/gcgraph.hpp"
diff --git a/modules/stitching/src/stitcher.cpp b/modules/stitching/src/stitcher.cpp
index 72f6e67ed..cef9fe4e9 100644
--- a/modules/stitching/src/stitcher.cpp
+++ b/modules/stitching/src/stitcher.cpp
@@ -59,7 +59,7 @@ Stitcher Stitcher::createDefault(bool try_use_gpu)
 #ifdef HAVE_OPENCV_GPU
     if (try_use_gpu && gpu::getCudaEnabledDeviceCount() > 0)
     {
-#if defined(HAVE_OPENCV_NONFREE)
+#ifdef HAVE_OPENCV_NONFREE
         stitcher.setFeaturesFinder(new detail::SurfFeaturesFinderGpu());
 #else
         stitcher.setFeaturesFinder(new detail::OrbFeaturesFinder());
diff --git a/modules/stitching/src/warpers.cpp b/modules/stitching/src/warpers.cpp
index 0df91edc6..1082b42d7 100644
--- a/modules/stitching/src/warpers.cpp
+++ b/modules/stitching/src/warpers.cpp
@@ -210,7 +210,7 @@ void SphericalWarper::detectResultRoi(Size src_size, Point &dst_tl, Point &dst_b
 }
 
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUWARPING
 Rect PlaneWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, gpu::GpuMat &xmap, gpu::GpuMat &ymap)
 {
     return buildMaps(src_size, K, R, Mat::zeros(3, 1, CV_32F), xmap, ymap);
diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp
index e79386df3..5ee79184d 100644
--- a/samples/cpp/stitching_detailed.cpp
+++ b/samples/cpp/stitching_detailed.cpp
@@ -356,7 +356,7 @@ int main(int argc, char* argv[])
     Ptr<FeaturesFinder> finder;
     if (features_type == "surf")
     {
-#if defined(HAVE_OPENCV_NONFREE) && defined(HAVE_OPENCV_GPU)
+#ifdef HAVE_OPENCV_NONFREE
         if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
             finder = new SurfFeaturesFinderGpu();
         else
@@ -544,7 +544,7 @@ int main(int argc, char* argv[])
     // Warp images and their masks
 
     Ptr<WarperCreator> warper_creator;
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUWARPING
     if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
     {
         if (warp_type == "plane") warper_creator = new cv::PlaneWarperGpu();

From 13d087e62d1953134cc0bc6326fb1017dcf8ee52 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 11:30:27 +0400
Subject: [PATCH 43/49] videostab module fixes

---
 modules/videostab/CMakeLists.txt                      |  3 ++-
 .../include/opencv2/videostab/global_motion.hpp       | 10 ++++++----
 .../include/opencv2/videostab/optical_flow.hpp        |  8 +++++---
 .../include/opencv2/videostab/wobble_suppression.hpp  |  5 +----
 modules/videostab/src/global_motion.cpp               | 10 ++++++++--
 modules/videostab/src/inpainting.cpp                  |  2 +-
 modules/videostab/src/optical_flow.cpp                |  6 ++++--
 modules/videostab/src/wobble_suppression.cpp          | 11 ++++++++++-
 samples/cpp/videostab.cpp                             |  4 ++--
 9 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/modules/videostab/CMakeLists.txt b/modules/videostab/CMakeLists.txt
index edde3f86d..e9e62815f 100644
--- a/modules/videostab/CMakeLists.txt
+++ b/modules/videostab/CMakeLists.txt
@@ -1,3 +1,4 @@
 set(the_description "Video stabilization")
-ocv_define_module(videostab opencv_imgproc opencv_features2d opencv_video opencv_photo opencv_calib3d OPTIONAL opencv_gpu opencv_highgui)
+ocv_define_module(videostab opencv_imgproc opencv_features2d opencv_video opencv_photo opencv_calib3d
+                  OPTIONAL opencv_gpu opencv_gpuwarping opencv_gpuoptflow opencv_highgui)
 
diff --git a/modules/videostab/include/opencv2/videostab/global_motion.hpp b/modules/videostab/include/opencv2/videostab/global_motion.hpp
index 0f4a9cfa7..58b831b37 100644
--- a/modules/videostab/include/opencv2/videostab/global_motion.hpp
+++ b/modules/videostab/include/opencv2/videostab/global_motion.hpp
@@ -52,8 +52,8 @@
 #include "opencv2/videostab/motion_core.hpp"
 #include "opencv2/videostab/outlier_rejection.hpp"
 
-#ifdef HAVE_OPENCV_GPU
-  #include "opencv2/gpu.hpp"
+#ifdef HAVE_OPENCV_GPUIMGPROC
+#  include "opencv2/gpuimgproc.hpp"
 #endif
 
 namespace cv
@@ -199,7 +199,8 @@ private:
     std::vector<Point2f> pointsPrevGood_, pointsGood_;
 };
 
-#ifdef HAVE_OPENCV_GPU
+#if defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
+
 class CV_EXPORTS KeypointBasedMotionEstimatorGpu : public ImageMotionEstimatorBase
 {
 public:
@@ -228,7 +229,8 @@ private:
     std::vector<Point2f> hostPointsPrevTmp_, hostPointsTmp_;
     std::vector<uchar> rejectionStatus_;
 };
-#endif
+
+#endif // defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
 
 CV_EXPORTS Mat getMotion(int from, int to, const std::vector<Mat> &motions);
 
diff --git a/modules/videostab/include/opencv2/videostab/optical_flow.hpp b/modules/videostab/include/opencv2/videostab/optical_flow.hpp
index 61f38fdb6..7509c1207 100644
--- a/modules/videostab/include/opencv2/videostab/optical_flow.hpp
+++ b/modules/videostab/include/opencv2/videostab/optical_flow.hpp
@@ -46,8 +46,8 @@
 #include "opencv2/core.hpp"
 #include "opencv2/opencv_modules.hpp"
 
-#ifdef HAVE_OPENCV_GPU
-  #include "opencv2/gpu.hpp"
+#ifdef HAVE_OPENCV_GPUOPTFLOW
+  #include "opencv2/gpuoptflow.hpp"
 #endif
 
 namespace cv
@@ -99,7 +99,8 @@ public:
             OutputArray status, OutputArray errors);
 };
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUOPTFLOW
+
 class CV_EXPORTS SparsePyrLkOptFlowEstimatorGpu
         : public PyrLkOptFlowEstimatorBase, public ISparseOptFlowEstimator
 {
@@ -135,6 +136,7 @@ private:
     gpu::PyrLKOpticalFlow optFlowEstimator_;
     gpu::GpuMat frame0_, frame1_, flowX_, flowY_, errors_;
 };
+
 #endif
 
 } // namespace videostab
diff --git a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
index f38de8504..987a19b1b 100644
--- a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
+++ b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
@@ -45,13 +45,10 @@
 
 #include <vector>
 #include "opencv2/core.hpp"
+#include "opencv2/core/gpumat.hpp"
 #include "opencv2/videostab/global_motion.hpp"
 #include "opencv2/videostab/log.hpp"
 
-#ifdef HAVE_OPENCV_GPU
-  #include "opencv2/gpu.hpp"
-#endif
-
 namespace cv
 {
 namespace videostab
diff --git a/modules/videostab/src/global_motion.cpp b/modules/videostab/src/global_motion.cpp
index fd56e2b15..45e2d164e 100644
--- a/modules/videostab/src/global_motion.cpp
+++ b/modules/videostab/src/global_motion.cpp
@@ -47,6 +47,10 @@
 #include "opencv2/opencv_modules.hpp"
 #include "clp.hpp"
 
+#ifdef HAVE_OPENCV_GPU
+#  include "opencv2/gpu.hpp"
+#endif
+
 namespace cv
 {
 namespace videostab
@@ -728,7 +732,8 @@ Mat KeypointBasedMotionEstimator::estimate(const Mat &frame0, const Mat &frame1,
 }
 
 
-#ifdef HAVE_OPENCV_GPU
+#if defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
+
 KeypointBasedMotionEstimatorGpu::KeypointBasedMotionEstimatorGpu(Ptr<MotionEstimatorBase> estimator)
     : ImageMotionEstimatorBase(estimator->motionModel()), motionEstimator_(estimator)
 {
@@ -799,7 +804,8 @@ Mat KeypointBasedMotionEstimatorGpu::estimate(const gpu::GpuMat &frame0, const g
     // estimate motion
     return motionEstimator_->estimate(hostPointsPrev_, hostPoints_, ok);
 }
-#endif // HAVE_OPENCV_GPU
+
+#endif // defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
 
 
 Mat getMotion(int from, int to, const std::vector<Mat> &motions)
diff --git a/modules/videostab/src/inpainting.cpp b/modules/videostab/src/inpainting.cpp
index ada8792f1..b3092048f 100644
--- a/modules/videostab/src/inpainting.cpp
+++ b/modules/videostab/src/inpainting.cpp
@@ -323,7 +323,7 @@ public:
 
 MotionInpainter::MotionInpainter()
 {
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUOPTFLOW
     setOptFlowEstimator(new DensePyrLkOptFlowEstimatorGpu());
 #else
     CV_Error(Error::StsNotImplemented, "Current implementation of MotionInpainter requires GPU");
diff --git a/modules/videostab/src/optical_flow.cpp b/modules/videostab/src/optical_flow.cpp
index f5c8288e7..cee08823a 100644
--- a/modules/videostab/src/optical_flow.cpp
+++ b/modules/videostab/src/optical_flow.cpp
@@ -58,7 +58,8 @@ void SparsePyrLkOptFlowEstimator::run(
 }
 
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUOPTFLOW
+
 SparsePyrLkOptFlowEstimatorGpu::SparsePyrLkOptFlowEstimatorGpu()
 {
     CV_Assert(gpu::getCudaEnabledDeviceCount() > 0);
@@ -133,7 +134,8 @@ void DensePyrLkOptFlowEstimatorGpu::run(
     flowX_.download(flowX.getMatRef());
     flowY_.download(flowY.getMatRef());
 }
-#endif // HAVE_OPENCV_GPU
+
+#endif // HAVE_OPENCV_GPUOPTFLOW
 
 } // namespace videostab
 } // namespace cv
diff --git a/modules/videostab/src/wobble_suppression.cpp b/modules/videostab/src/wobble_suppression.cpp
index 3c48df58f..049bb81b3 100644
--- a/modules/videostab/src/wobble_suppression.cpp
+++ b/modules/videostab/src/wobble_suppression.cpp
@@ -44,6 +44,15 @@
 #include "opencv2/videostab/wobble_suppression.hpp"
 #include "opencv2/videostab/ring_buffer.hpp"
 
+#ifdef HAVE_OPENCV_GPUWARPING
+#  include "opencv2/gpuwarping.hpp"
+#endif
+
+#ifdef HAVE_OPENCV_GPU
+#  include "opencv2/gpu.hpp"
+#endif
+
+
 namespace cv
 {
 namespace videostab
@@ -113,7 +122,7 @@ void MoreAccurateMotionWobbleSuppressor::suppress(int idx, const Mat &frame, Mat
 }
 
 
-#ifdef HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_GPUWARPING
 void MoreAccurateMotionWobbleSuppressorGpu::suppress(int idx, const gpu::GpuMat &frame, gpu::GpuMat &result)
 {
     CV_Assert(motions_ && stabilizationMotions_);
diff --git a/samples/cpp/videostab.cpp b/samples/cpp/videostab.cpp
index 686b3636f..21606d495 100644
--- a/samples/cpp/videostab.cpp
+++ b/samples/cpp/videostab.cpp
@@ -216,7 +216,7 @@ public:
             outlierRejector = tblor;
         }
 
-#ifdef HAVE_OPENCV_GPU
+#if defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
         if (gpu)
         {
             KeypointBasedMotionEstimatorGpu *kbest = new KeypointBasedMotionEstimatorGpu(est);
@@ -257,7 +257,7 @@ public:
             outlierRejector = tblor;
         }
 
-#ifdef HAVE_OPENCV_GPU
+#if defined(HAVE_OPENCV_GPUIMGPROC) && defined(HAVE_OPENCV_GPU) && defined(HAVE_OPENCV_GPUOPTFLOW)
         if (gpu)
         {
             KeypointBasedMotionEstimatorGpu *kbest = new KeypointBasedMotionEstimatorGpu(est);

From 5ee9d3111df8a873970e9742a2243e24be3bf342 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 12 Apr 2013 12:36:50 +0400
Subject: [PATCH 44/49] fixed gpu tutorial

---
 .../gpu/gpu-basics-similarity/gpu-basics-similarity.cpp      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
index f3f2b683d..3a0f99e49 100644
--- a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+++ b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
@@ -5,7 +5,10 @@
 #include <opencv2/core/utility.hpp>
 #include <opencv2/imgproc.hpp>// Image processing methods for the CPU
 #include <opencv2/highgui.hpp>// Read images
-#include <opencv2/gpu.hpp>        // GPU structures and methods
+
+// GPU structures and methods
+#include <opencv2/gpuarithm.hpp>
+#include <opencv2/gpufilters.hpp>
 
 using namespace std;
 using namespace cv;

From 0ce10532b3b98377a42d111884b0d8c238b67424 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 12 Apr 2013 17:17:25 +0400
Subject: [PATCH 45/49] softcascade module fixes

---
 modules/softcascade/src/detector_cuda.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/softcascade/src/detector_cuda.cpp b/modules/softcascade/src/detector_cuda.cpp
index 5c7aba185..3e7795df8 100644
--- a/modules/softcascade/src/detector_cuda.cpp
+++ b/modules/softcascade/src/detector_cuda.cpp
@@ -537,7 +537,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
             flds.createLevels(image.rows, image.cols);
 
         flds.preprocessor->apply(image, flds.shrunk);
-        integral(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
+        ::integral(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
     }
     else
     {

From 6d735c11c651a71f39e8390c62691d2983aa6252 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 10:27:56 +0400
Subject: [PATCH 46/49] removed const qualifier (it produces warnings)

---
 .../opencv2/core/cuda/border_interpolate.hpp  | 58 +++++++++----------
 .../include/opencv2/core/cuda/filters.hpp     | 10 ++--
 .../include/opencv2/core/cuda/functional.hpp  | 26 ++++-----
 modules/gpuarithm/src/cuda/cmp_scalar.cu      |  8 +--
 modules/gpuarithm/src/cuda/math.cu            |  2 +-
 modules/gpuimgproc/src/cuda/canny.cu          |  4 +-
 6 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/modules/core/include/opencv2/core/cuda/border_interpolate.hpp b/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
index 1347a2f84..6c53f09ee 100644
--- a/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
+++ b/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
@@ -73,8 +73,8 @@ namespace cv { namespace gpu { namespace cudev
             return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
         }
 
-        const int width;
-        const D val;
+        int width;
+        D val;
     };
 
     template <typename D> struct BrdColConstant
@@ -98,8 +98,8 @@ namespace cv { namespace gpu { namespace cudev
             return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
         }
 
-        const int height;
-        const D val;
+        int height;
+        D val;
     };
 
     template <typename D> struct BrdConstant
@@ -120,9 +120,9 @@ namespace cv { namespace gpu { namespace cudev
             return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
         }
 
-        const int height;
-        const int width;
-        const D val;
+        int height;
+        int width;
+        D val;
     };
 
     //////////////////////////////////////////////////////////////
@@ -165,7 +165,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(data[idx_col(x)]);
         }
 
-        const int last_col;
+        int last_col;
     };
 
     template <typename D> struct BrdColReplicate
@@ -205,7 +205,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
         }
 
-        const int last_row;
+        int last_row;
     };
 
     template <typename D> struct BrdReplicate
@@ -255,8 +255,8 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(src(idx_row(y), idx_col(x)));
         }
 
-        const int last_row;
-        const int last_col;
+        int last_row;
+        int last_col;
     };
 
     //////////////////////////////////////////////////////////////
@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(data[idx_col(x)]);
         }
 
-        const int last_col;
+        int last_col;
     };
 
     template <typename D> struct BrdColReflect101
@@ -339,7 +339,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
         }
 
-        const int last_row;
+        int last_row;
     };
 
     template <typename D> struct BrdReflect101
@@ -389,8 +389,8 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(src(idx_row(y), idx_col(x)));
         }
 
-        const int last_row;
-        const int last_col;
+        int last_row;
+        int last_col;
     };
 
     //////////////////////////////////////////////////////////////
@@ -433,7 +433,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(data[idx_col(x)]);
         }
 
-        const int last_col;
+        int last_col;
     };
 
     template <typename D> struct BrdColReflect
@@ -473,7 +473,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
         }
 
-        const int last_row;
+        int last_row;
     };
 
     template <typename D> struct BrdReflect
@@ -523,8 +523,8 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(src(idx_row(y), idx_col(x)));
         }
 
-        const int last_row;
-        const int last_col;
+        int last_row;
+        int last_col;
     };
 
     //////////////////////////////////////////////////////////////
@@ -567,7 +567,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(data[idx_col(x)]);
         }
 
-        const int width;
+        int width;
     };
 
     template <typename D> struct BrdColWrap
@@ -607,7 +607,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
         }
 
-        const int height;
+        int height;
     };
 
     template <typename D> struct BrdWrap
@@ -664,8 +664,8 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<D>(src(idx_row(y), idx_col(x)));
         }
 
-        const int height;
-        const int width;
+        int height;
+        int width;
     };
 
     //////////////////////////////////////////////////////////////
@@ -683,8 +683,8 @@ namespace cv { namespace gpu { namespace cudev
             return b.at(y, x, ptr);
         }
 
-        const Ptr2D ptr;
-        const B b;
+        Ptr2D ptr;
+        B b;
     };
 
     // under win32 there is some bug with templated types that passed as kernel parameters
@@ -704,10 +704,10 @@ namespace cv { namespace gpu { namespace cudev
             return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
         }
 
-        const Ptr2D src;
-        const int height;
-        const int width;
-        const D val;
+        Ptr2D src;
+        int height;
+        int width;
+        D val;
     };
 }}} // namespace cv { namespace gpu { namespace cudev
 
diff --git a/modules/core/include/opencv2/core/cuda/filters.hpp b/modules/core/include/opencv2/core/cuda/filters.hpp
index 19a8c5883..f35f662e8 100644
--- a/modules/core/include/opencv2/core/cuda/filters.hpp
+++ b/modules/core/include/opencv2/core/cuda/filters.hpp
@@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace cudev
             return src(__float2int_rz(y), __float2int_rz(x));
         }
 
-        const Ptr2D src;
+        Ptr2D src;
     };
 
     template <typename Ptr2D> struct LinearFilter
@@ -107,7 +107,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<elem_type>(out);
         }
 
-        const Ptr2D src;
+        Ptr2D src;
     };
 
     template <typename Ptr2D> struct CubicFilter
@@ -166,7 +166,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<elem_type>(res);
         }
 
-        const Ptr2D src;
+        Ptr2D src;
     };
     // for integer scaling
     template <typename Ptr2D> struct IntegerAreaFilter
@@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<elem_type>(out);
         }
 
-        const Ptr2D src;
+        Ptr2D src;
         float scale_x, scale_y ,scale;
     };
 
@@ -269,7 +269,7 @@ namespace cv { namespace gpu { namespace cudev
             return saturate_cast<elem_type>(out);
         }
 
-        const Ptr2D src;
+        Ptr2D src;
         float scale_x, scale_y;
         int width, haight;
     };
diff --git a/modules/core/include/opencv2/core/cuda/functional.hpp b/modules/core/include/opencv2/core/cuda/functional.hpp
index 506ccd876..95706ca44 100644
--- a/modules/core/include/opencv2/core/cuda/functional.hpp
+++ b/modules/core/include/opencv2/core/cuda/functional.hpp
@@ -552,8 +552,8 @@ namespace cv { namespace gpu { namespace cudev
 
         __device__ __forceinline__ thresh_binary_func():unary_function<T, T>(){}
 
-        const T thresh;
-        const T maxVal;
+        T thresh;
+        T maxVal;
     };
 
     template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
@@ -570,8 +570,8 @@ namespace cv { namespace gpu { namespace cudev
 
         __device__ __forceinline__ thresh_binary_inv_func():unary_function<T, T>(){}
 
-        const T thresh;
-        const T maxVal;
+        T thresh;
+        T maxVal;
     };
 
     template <typename T> struct thresh_trunc_func : unary_function<T, T>
@@ -588,7 +588,7 @@ namespace cv { namespace gpu { namespace cudev
 
         __device__ __forceinline__ thresh_trunc_func():unary_function<T, T>(){}
 
-        const T thresh;
+        T thresh;
     };
 
     template <typename T> struct thresh_to_zero_func : unary_function<T, T>
@@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace cudev
 
         __device__ __forceinline__ thresh_to_zero_func():unary_function<T, T>(){}
 
-        const T thresh;
+        T thresh;
     };
 
     template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
@@ -620,7 +620,7 @@ namespace cv { namespace gpu { namespace cudev
 
         __device__ __forceinline__ thresh_to_zero_inv_func():unary_function<T, T>(){}
 
-        const T thresh;
+        T thresh;
     };
 //bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============>
     // Function Object Adaptors
@@ -636,7 +636,7 @@ namespace cv { namespace gpu { namespace cudev
         __device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function<typename Predicate::argument_type, bool>(){}
         __device__ __forceinline__ unary_negate() : unary_function<typename Predicate::argument_type, bool>(){}
 
-      const Predicate pred;
+      Predicate pred;
     };
 
     template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
@@ -659,7 +659,7 @@ namespace cv { namespace gpu { namespace cudev
         __device__ __forceinline__ binary_negate() :
         binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
 
-        const Predicate pred;
+        Predicate pred;
     };
 
     template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
@@ -679,8 +679,8 @@ namespace cv { namespace gpu { namespace cudev
         __device__ __forceinline__ binder1st(const binder1st& other) :
         unary_function<typename Op::second_argument_type, typename Op::result_type>(){}
 
-        const Op op;
-        const typename Op::first_argument_type arg1;
+        Op op;
+        typename Op::first_argument_type arg1;
     };
 
     template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
@@ -700,8 +700,8 @@ namespace cv { namespace gpu { namespace cudev
          __device__ __forceinline__ binder2nd(const binder2nd& other) :
         unary_function<typename Op::first_argument_type, typename Op::result_type>(), op(other.op), arg2(other.arg2){}
 
-        const Op op;
-        const typename Op::second_argument_type arg2;
+        Op op;
+        typename Op::second_argument_type arg2;
     };
 
     template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
diff --git a/modules/gpuarithm/src/cuda/cmp_scalar.cu b/modules/gpuarithm/src/cuda/cmp_scalar.cu
index 09ff0e6e7..678f253ea 100644
--- a/modules/gpuarithm/src/cuda/cmp_scalar.cu
+++ b/modules/gpuarithm/src/cuda/cmp_scalar.cu
@@ -72,7 +72,7 @@ namespace arithm
     template <class Op, typename T>
     struct CmpScalar<Op, T, 1> : unary_function<T, uchar>
     {
-        const T val;
+        T val;
 
         __host__ explicit CmpScalar(T val_) : val(val_) {}
 
@@ -85,7 +85,7 @@ namespace arithm
     template <class Op, typename T>
     struct CmpScalar<Op, T, 2> : unary_function<TYPE_VEC(T, 2), TYPE_VEC(uchar, 2)>
     {
-        const TYPE_VEC(T, 2) val;
+        TYPE_VEC(T, 2) val;
 
         __host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {}
 
@@ -98,7 +98,7 @@ namespace arithm
     template <class Op, typename T>
     struct CmpScalar<Op, T, 3> : unary_function<TYPE_VEC(T, 3), TYPE_VEC(uchar, 3)>
     {
-        const TYPE_VEC(T, 3) val;
+        TYPE_VEC(T, 3) val;
 
         __host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {}
 
@@ -111,7 +111,7 @@ namespace arithm
     template <class Op, typename T>
     struct CmpScalar<Op, T, 4> : unary_function<TYPE_VEC(T, 4), TYPE_VEC(uchar, 4)>
     {
-        const TYPE_VEC(T, 4) val;
+        TYPE_VEC(T, 4) val;
 
         __host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {}
 
diff --git a/modules/gpuarithm/src/cuda/math.cu b/modules/gpuarithm/src/cuda/math.cu
index 30093d51c..86be98ea0 100644
--- a/modules/gpuarithm/src/cuda/math.cu
+++ b/modules/gpuarithm/src/cuda/math.cu
@@ -253,7 +253,7 @@ namespace arithm
     };
     template<> struct PowOp<float> : unary_function<float, float>
     {
-        const float power;
+        float power;
 
         PowOp(double power_) : power(static_cast<float>(power_)) {}
 
diff --git a/modules/gpuimgproc/src/cuda/canny.cu b/modules/gpuimgproc/src/cuda/canny.cu
index 042e9afcc..77b185a4c 100644
--- a/modules/gpuimgproc/src/cuda/canny.cu
+++ b/modules/gpuimgproc/src/cuda/canny.cu
@@ -94,8 +94,8 @@ namespace canny
     texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
     struct SrcTex
     {
-        const int xoff;
-        const int yoff;
+        int xoff;
+        int yoff;
         __host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
 
         __device__ __forceinline__ int operator ()(int y, int x) const

From 3be997cfa96b666182af55e9cb934d0576fd6dd3 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 11:30:54 +0400
Subject: [PATCH 47/49] gpulegacy module fixes

---
 modules/gpulegacy/CMakeLists.txt              |  2 +-
 .../include/opencv2/gpulegacy/NCVPyramid.hpp  |  4 +-
 .../src/cuda/NCVHaarObjectDetection.cu        |  3 +-
 modules/gpulegacy/src/cuda/NCVPyramid.cu      | 52 ++++++++++++-------
 modules/gpulegacy/test/main_nvidia.cpp        |  4 +-
 modules/gpuwarping/src/pyramids.cpp           | 38 +-------------
 6 files changed, 43 insertions(+), 60 deletions(-)

diff --git a/modules/gpulegacy/CMakeLists.txt b/modules/gpulegacy/CMakeLists.txt
index 6dd61bd5b..9aa9b3b3e 100644
--- a/modules/gpulegacy/CMakeLists.txt
+++ b/modules/gpulegacy/CMakeLists.txt
@@ -4,6 +4,6 @@ endif()
 
 set(the_description "GPU-accelerated Computer Vision (legacy)")
 
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wuninitialized)
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4130 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wuninitialized)
 
 ocv_define_module(gpulegacy opencv_core OPTIONAL opencv_objdetect)
diff --git a/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
index 8fda836fe..88e2296be 100644
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
@@ -52,8 +52,8 @@ namespace cv { namespace gpu { namespace cudev
 {
     namespace pyramid
     {
-        template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+        CV_EXPORTS void downsampleX2(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream);
+        CV_EXPORTS void interpolateFrom1(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream);
     }
 }}}
 
diff --git a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
index da34ba731..c8aaaeace 100644
--- a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
+++ b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
@@ -66,6 +66,7 @@
 
 #ifdef HAVE_OPENCV_OBJDETECT
 #  include "opencv2/objdetect.hpp"
+#  include "opencv2/objdetect/objdetect_c.h"
 #endif
 
 #include "opencv2/gpulegacy/NCV.hpp"
@@ -2130,7 +2131,7 @@ static NCVStatus loadFromXML(const cv::String &filename,
     haar.ClassifierSize.height = 0;
     haar.bHasStumpsOnly = true;
     haar.bNeedsTiltedII = false;
-    Ncv32u curMaxTreeDepth;
+    Ncv32u curMaxTreeDepth = 0;
 
     std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
     haarStages.resize(0);
diff --git a/modules/gpulegacy/src/cuda/NCVPyramid.cu b/modules/gpulegacy/src/cuda/NCVPyramid.cu
index acc4441b1..d42b46bcb 100644
--- a/modules/gpulegacy/src/cuda/NCVPyramid.cu
+++ b/modules/gpulegacy/src/cuda/NCVPyramid.cu
@@ -223,17 +223,25 @@ namespace cv { namespace gpu { namespace cudev
                 cudaSafeCall( cudaDeviceSynchronize() );
         }
 
-        template void kernelDownsampleX2_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelDownsampleX2_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelDownsampleX2_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+        void downsampleX2(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream)
+        {
+            typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 
-        template void kernelDownsampleX2_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelDownsampleX2_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelDownsampleX2_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+            static const func_t funcs[6][4] =
+            {
+                {kernelDownsampleX2_gpu<uchar1>       , 0 /*kernelDownsampleX2_gpu<uchar2>*/ , kernelDownsampleX2_gpu<uchar3>      , kernelDownsampleX2_gpu<uchar4>      },
+                {0 /*kernelDownsampleX2_gpu<char1>*/  , 0 /*kernelDownsampleX2_gpu<char2>*/  , 0 /*kernelDownsampleX2_gpu<char3>*/ , 0 /*kernelDownsampleX2_gpu<char4>*/ },
+                {kernelDownsampleX2_gpu<ushort1>      , 0 /*kernelDownsampleX2_gpu<ushort2>*/, kernelDownsampleX2_gpu<ushort3>     , kernelDownsampleX2_gpu<ushort4>     },
+                {0 /*kernelDownsampleX2_gpu<short1>*/ , 0 /*kernelDownsampleX2_gpu<short2>*/ , 0 /*kernelDownsampleX2_gpu<short3>*/, 0 /*kernelDownsampleX2_gpu<short4>*/},
+                {0 /*kernelDownsampleX2_gpu<int1>*/   , 0 /*kernelDownsampleX2_gpu<int2>*/   , 0 /*kernelDownsampleX2_gpu<int3>*/  , 0 /*kernelDownsampleX2_gpu<int4>*/  },
+                {kernelDownsampleX2_gpu<float1>       , 0 /*kernelDownsampleX2_gpu<float2>*/ , kernelDownsampleX2_gpu<float3>      , kernelDownsampleX2_gpu<float4>      }
+            };
 
-        template void kernelDownsampleX2_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelDownsampleX2_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelDownsampleX2_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+            const func_t func = funcs[depth][cn - 1];
+            CV_Assert(func != 0);
+
+            func(src, dst, stream);
+        }
     }
 }}}
 
@@ -298,17 +306,25 @@ namespace cv { namespace gpu { namespace cudev
                 cudaSafeCall( cudaDeviceSynchronize() );
         }
 
-        template void kernelInterpolateFrom1_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelInterpolateFrom1_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelInterpolateFrom1_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+        void interpolateFrom1(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream)
+        {
+            typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 
-        template void kernelInterpolateFrom1_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelInterpolateFrom1_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelInterpolateFrom1_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+            static const func_t funcs[6][4] =
+            {
+                {kernelInterpolateFrom1_gpu<uchar1>      , 0 /*kernelInterpolateFrom1_gpu<uchar2>*/ , kernelInterpolateFrom1_gpu<uchar3>      , kernelInterpolateFrom1_gpu<uchar4>      },
+                {0 /*kernelInterpolateFrom1_gpu<char1>*/ , 0 /*kernelInterpolateFrom1_gpu<char2>*/  , 0 /*kernelInterpolateFrom1_gpu<char3>*/ , 0 /*kernelInterpolateFrom1_gpu<char4>*/ },
+                {kernelInterpolateFrom1_gpu<ushort1>     , 0 /*kernelInterpolateFrom1_gpu<ushort2>*/, kernelInterpolateFrom1_gpu<ushort3>     , kernelInterpolateFrom1_gpu<ushort4>     },
+                {0 /*kernelInterpolateFrom1_gpu<short1>*/, 0 /*kernelInterpolateFrom1_gpu<short2>*/ , 0 /*kernelInterpolateFrom1_gpu<short3>*/, 0 /*kernelInterpolateFrom1_gpu<short4>*/},
+                {0 /*kernelInterpolateFrom1_gpu<int1>*/  , 0 /*kernelInterpolateFrom1_gpu<int2>*/   , 0 /*kernelInterpolateFrom1_gpu<int3>*/  , 0 /*kernelInterpolateFrom1_gpu<int4>*/  },
+                {kernelInterpolateFrom1_gpu<float1>      , 0 /*kernelInterpolateFrom1_gpu<float2>*/ , kernelInterpolateFrom1_gpu<float3>      , kernelInterpolateFrom1_gpu<float4>      }
+            };
 
-        template void kernelInterpolateFrom1_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelInterpolateFrom1_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-        template void kernelInterpolateFrom1_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
+            const func_t func = funcs[depth][cn - 1];
+            CV_Assert(func != 0);
+
+            func(src, dst, stream);
+        }
     }
 }}}
 
diff --git a/modules/gpulegacy/test/main_nvidia.cpp b/modules/gpulegacy/test/main_nvidia.cpp
index 1179b5b96..0c82a1abf 100644
--- a/modules/gpulegacy/test/main_nvidia.cpp
+++ b/modules/gpulegacy/test/main_nvidia.cpp
@@ -349,7 +349,7 @@ bool nvidia_NPPST_Resize(const std::string& test_data_path, OutputLevel outputLe
     NCVAutoTestLister testListerResize("NPPST Resize", outputLevel);
 
     NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 2048, 2048);
-    NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, -1, 2048, 2048);
+    NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, (Ncv64u) -1, 2048, 2048);
 
     generateResizeTests(testListerResize, testSrcRandom_32u);
     generateResizeTests(testListerResize, testSrcRandom_64u);
@@ -379,7 +379,7 @@ bool nvidia_NPPST_Transpose(const std::string& test_data_path, OutputLevel outpu
     NCVAutoTestLister testListerTranspose("NPPST Transpose", outputLevel);
 
     NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 2048, 2048);
-    NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, -1, 2048, 2048);
+    NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, (Ncv64u) -1, 2048, 2048);
 
     generateTransposeTests(testListerTranspose, testSrcRandom_32u);
     generateTransposeTests(testListerTranspose, testSrcRandom_64u);
diff --git a/modules/gpuwarping/src/pyramids.cpp b/modules/gpuwarping/src/pyramids.cpp
index 91b568d70..db9dd611a 100644
--- a/modules/gpuwarping/src/pyramids.cpp
+++ b/modules/gpuwarping/src/pyramids.cpp
@@ -140,25 +140,8 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre
     (void) stream;
     throw_no_cuda();
 #else
-    using namespace cv::gpu::cudev::pyramid;
-
-    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-
-    static const func_t funcs[6][4] =
-    {
-        {kernelDownsampleX2_gpu<uchar1>       , 0 /*kernelDownsampleX2_gpu<uchar2>*/ , kernelDownsampleX2_gpu<uchar3>      , kernelDownsampleX2_gpu<uchar4>      },
-        {0 /*kernelDownsampleX2_gpu<char1>*/  , 0 /*kernelDownsampleX2_gpu<char2>*/  , 0 /*kernelDownsampleX2_gpu<char3>*/ , 0 /*kernelDownsampleX2_gpu<char4>*/ },
-        {kernelDownsampleX2_gpu<ushort1>      , 0 /*kernelDownsampleX2_gpu<ushort2>*/, kernelDownsampleX2_gpu<ushort3>     , kernelDownsampleX2_gpu<ushort4>     },
-        {0 /*kernelDownsampleX2_gpu<short1>*/ , 0 /*kernelDownsampleX2_gpu<short2>*/ , 0 /*kernelDownsampleX2_gpu<short3>*/, 0 /*kernelDownsampleX2_gpu<short4>*/},
-        {0 /*kernelDownsampleX2_gpu<int1>*/   , 0 /*kernelDownsampleX2_gpu<int2>*/   , 0 /*kernelDownsampleX2_gpu<int3>*/  , 0 /*kernelDownsampleX2_gpu<int4>*/  },
-        {kernelDownsampleX2_gpu<float1>       , 0 /*kernelDownsampleX2_gpu<float2>*/ , kernelDownsampleX2_gpu<float3>      , kernelDownsampleX2_gpu<float4>      }
-    };
-
     CV_Assert(img.depth() <= CV_32F && img.channels() <= 4);
 
-    const func_t func = funcs[img.depth()][img.channels() - 1];
-    CV_Assert(func != 0);
-
     layer0_ = img;
     Size szLastLayer = img.size();
     nLayers_ = 1;
@@ -180,7 +163,7 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre
 
         const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
 
-        func(prevLayer, pyramid_[i], StreamAccessor::getStream(stream));
+        cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
 
         szLastLayer = szCurLayer;
     }
@@ -195,27 +178,10 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
     (void) stream;
     throw_no_cuda();
 #else
-    using namespace cv::gpu::cudev::pyramid;
-
-    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
-
-    static const func_t funcs[6][4] =
-    {
-        {kernelInterpolateFrom1_gpu<uchar1>      , 0 /*kernelInterpolateFrom1_gpu<uchar2>*/ , kernelInterpolateFrom1_gpu<uchar3>      , kernelInterpolateFrom1_gpu<uchar4>      },
-        {0 /*kernelInterpolateFrom1_gpu<char1>*/ , 0 /*kernelInterpolateFrom1_gpu<char2>*/  , 0 /*kernelInterpolateFrom1_gpu<char3>*/ , 0 /*kernelInterpolateFrom1_gpu<char4>*/ },
-        {kernelInterpolateFrom1_gpu<ushort1>     , 0 /*kernelInterpolateFrom1_gpu<ushort2>*/, kernelInterpolateFrom1_gpu<ushort3>     , kernelInterpolateFrom1_gpu<ushort4>     },
-        {0 /*kernelInterpolateFrom1_gpu<short1>*/, 0 /*kernelInterpolateFrom1_gpu<short2>*/ , 0 /*kernelInterpolateFrom1_gpu<short3>*/, 0 /*kernelInterpolateFrom1_gpu<short4>*/},
-        {0 /*kernelInterpolateFrom1_gpu<int1>*/  , 0 /*kernelInterpolateFrom1_gpu<int2>*/   , 0 /*kernelInterpolateFrom1_gpu<int3>*/  , 0 /*kernelInterpolateFrom1_gpu<int4>*/  },
-        {kernelInterpolateFrom1_gpu<float1>      , 0 /*kernelInterpolateFrom1_gpu<float2>*/ , kernelInterpolateFrom1_gpu<float3>      , kernelInterpolateFrom1_gpu<float4>      }
-    };
-
     CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0);
 
     ensureSizeIsEnough(outRoi, layer0_.type(), outImg);
 
-    const func_t func = funcs[outImg.depth()][outImg.channels() - 1];
-    CV_Assert(func != 0);
-
     if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
     {
         if (stream)
@@ -249,7 +215,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
         lastLayer = curLayer;
     }
 
-    func(lastLayer, outImg, StreamAccessor::getStream(stream));
+    cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
 #endif
 }
 

From 787e56ab5c478fa3289f89f7d086137451b55cb1 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Sat, 13 Apr 2013 20:49:52 +0400
Subject: [PATCH 48/49] disabled gpucodec module on MacOS

---
 modules/gpucodec/CMakeLists.txt |  2 +-
 samples/gpu/CMakeLists.txt      |  9 ++++++++-
 samples/gpu/video_reader.cpp    | 15 +++++++++++++++
 samples/gpu/video_writer.cpp    | 15 +++++++++++++++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/modules/gpucodec/CMakeLists.txt b/modules/gpucodec/CMakeLists.txt
index f03c201b3..17202bd0e 100644
--- a/modules/gpucodec/CMakeLists.txt
+++ b/modules/gpucodec/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(ANDROID OR IOS)
+if(ANDROID OR IOS OR APPLE)
   ocv_module_disable(gpucodec)
 endif()
 
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 1c6f6a963..a14c5907a 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                      opencv_ml opencv_video opencv_objdetect opencv_features2d
                                      opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                      opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
+                                     opencv_gpuarithm opencv_gpufilters opencv_gpuwarping opencv_gpuimgproc
                                      opencv_gpufeatures2d opencv_gpuoptflow opencv_gpubgsegm
                                      opencv_gpustereo opencv_gpulegacy)
 
@@ -24,6 +24,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/nonfree/include")
   endif()
 
+  if(HAVE_opencv_gpucodec)
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpucodec/include")
+  endif()
+
   if(HAVE_CUDA)
     ocv_include_directories(${CUDA_INCLUDE_DIRS})
   endif()
@@ -43,6 +47,9 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     if(HAVE_opencv_nonfree)
       target_link_libraries(${the_target} opencv_nonfree)
     endif()
+    if(HAVE_opencv_gpucodec)
+      target_link_libraries(${the_target} opencv_gpucodec)
+    endif()
 
     set_target_properties(${the_target} PROPERTIES
       OUTPUT_NAME "${project}-example-${name}"
diff --git a/samples/gpu/video_reader.cpp b/samples/gpu/video_reader.cpp
index f9bbbbb33..7eea72639 100644
--- a/samples/gpu/video_reader.cpp
+++ b/samples/gpu/video_reader.cpp
@@ -1,4 +1,9 @@
 #include <iostream>
+
+#include "opencv2/opencv_modules.hpp"
+
+#if defined(HAVE_OPENCV_GPUCODEC)
+
 #include <string>
 #include <vector>
 #include <algorithm>
@@ -69,3 +74,13 @@ int main(int argc, const char* argv[])
 
     return 0;
 }
+
+#else
+
+int main()
+{
+    std::cout << "OpenCV was built without GPU Video decoding support\n" << std::endl;
+    return 0;
+}
+
+#endif
diff --git a/samples/gpu/video_writer.cpp b/samples/gpu/video_writer.cpp
index aed76e04e..d540d0409 100644
--- a/samples/gpu/video_writer.cpp
+++ b/samples/gpu/video_writer.cpp
@@ -1,4 +1,9 @@
 #include <iostream>
+
+#include "opencv2/opencv_modules.hpp"
+
+#if defined(HAVE_OPENCV_GPUCODEC) && defined(WIN32)
+
 #include <vector>
 #include <numeric>
 
@@ -94,3 +99,13 @@ int main(int argc, const char* argv[])
 
     return 0;
 }
+
+#else
+
+int main()
+{
+    std::cout << "OpenCV was built without GPU Video encoding support\n" << std::endl;
+    return 0;
+}
+
+#endif

From 564f60988529595d18673f9d1271d5bf295a5a39 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 18 Apr 2013 12:08:51 +0400
Subject: [PATCH 49/49] fixed samples compilation

---
 samples/cpp/CMakeLists.txt                   | 25 ++++++++++----------
 samples/gpu/bgfg_segm.cpp                    |  6 +++++
 samples/gpu/cascadeclassifier_nvidia_api.cpp |  1 +
 samples/gpu/hog.cpp                          |  1 +
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt
index 97884825b..ab4a0d06c 100644
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@@ -16,16 +16,17 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
   ocv_include_directories("${OpenCV_SOURCE_DIR}/include")#for opencv.hpp
   ocv_include_modules(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS})
 
-  if(HAVE_opencv_gpu)
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuwarping/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufeatures2d/include")
+  if(HAVE_opencv_gpuoptflow)
     ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuoptflow/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpubgsegm/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpustereo/include")
-    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
+  endif()
+  if(HAVE_opencv_gpuimgproc)
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuimgproc/include")
+  endif()
+  if(HAVE_opencv_gpuarithm)
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
+  endif()
+  if(HAVE_opencv_gpufilters)
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
   endif()
 
   if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS)
@@ -51,8 +52,8 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     add_executable(${the_target} ${srcs})
     target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_CPP_SAMPLES_REQUIRED_DEPS})
 
-    if(HAVE_opencv_gpu)
-      target_link_libraries(${the_target} opencv_gpu)
+    if("${srcs}" MATCHES "gpu/")
+      target_link_libraries(${the_target} opencv_gpuarithm opencv_gpufilters)
     endif()
 
     set_target_properties(${the_target} PROPERTIES
@@ -78,7 +79,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
     ocv_list_filterout(cpp_samples Qt_sample)
   endif()
 
-  if(NOT HAVE_opencv_gpu)
+  if(NOT HAVE_opencv_gpuarithm OR NOT HAVE_opencv_gpufilters)
     ocv_list_filterout(cpp_samples "/gpu/")
   endif()
 
diff --git a/samples/gpu/bgfg_segm.cpp b/samples/gpu/bgfg_segm.cpp
index 63552c250..4e8284835 100644
--- a/samples/gpu/bgfg_segm.cpp
+++ b/samples/gpu/bgfg_segm.cpp
@@ -6,6 +6,12 @@
 #include "opencv2/gpu.hpp"
 #include "opencv2/highgui.hpp"
 
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_NONFREE
+#  include "opencv2/nonfree/gpu.hpp"
+#endif
+
 using namespace std;
 using namespace cv;
 using namespace cv::gpu;
diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp
index f8dd60d76..3e9b668b6 100644
--- a/samples/gpu/cascadeclassifier_nvidia_api.cpp
+++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp
@@ -8,6 +8,7 @@
 #include <cstdio>
 #include "opencv2/gpu/gpu.hpp"
 #include "opencv2/highgui/highgui.hpp"
+#include "opencv2/objdetect/objdetect.hpp"
 #include "opencv2/objdetect/objdetect_c.h"
 
 #ifdef HAVE_CUDA
diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp
index 4e7ae5fe2..a6a1c0b2a 100644
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@@ -7,6 +7,7 @@
 #include <opencv2/core/utility.hpp>
 #include "opencv2/gpu.hpp"
 #include "opencv2/highgui.hpp"
+#include "opencv2/objdetect.hpp"
 
 using namespace std;
 using namespace cv;