From a3341006ee096df6d59b2734c3b8a1ca8df9cff8 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 11 Jun 2013 15:45:04 +0400
Subject: [PATCH] updated documentation

---
 modules/gpuarithm/doc/arithm.rst             | 105 +--
 modules/gpuarithm/doc/core.rst               | 156 +++--
 modules/gpuarithm/doc/element_operations.rst | 671 +++++++++----------
 modules/gpuarithm/doc/reductions.rst         | 164 +++--
 modules/gpufilters/doc/filtering.rst         |   4 +-
 5 files changed, 545 insertions(+), 555 deletions(-)

diff --git a/modules/gpuarithm/doc/arithm.rst b/modules/gpuarithm/doc/arithm.rst
index 8a051bc49..2f1d74df5 100644
--- a/modules/gpuarithm/doc/arithm.rst
+++ b/modules/gpuarithm/doc/arithm.rst
@@ -6,10 +6,10 @@ Arithm Operations on Matrices
 
 
 gpu::gemm
-------------------
+---------
 Performs generalized matrix multiplication.
 
-.. ocv:function:: void gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::gemm(InputArray src1, InputArray src2, double alpha, InputArray src3, double beta, OutputArray dst, int flags = 0, Stream& stream = Stream::Null())
 
     :param src1: First multiplied input matrix that should have  ``CV_32FC1`` , ``CV_64FC1`` , ``CV_32FC2`` , or  ``CV_64FC2``  type.
 
@@ -44,38 +44,40 @@ The function performs generalized matrix multiplication similar to the ``gemm``
 
 
 gpu::mulSpectrums
----------------------
+-----------------
 Performs a per-element multiplication of two Fourier spectrums.
 
-.. ocv:function:: void gpu::mulSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::mulSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, bool conjB=false, Stream& stream = Stream::Null())
 
-    :param a: First spectrum.
+    :param src1: First spectrum.
 
-    :param b: Second spectrum with the same size and type as  ``a`` .
+    :param src2: Second spectrum with the same size and type as  ``a`` .
 
-    :param c: Destination spectrum.
+    :param dst: Destination spectrum.
 
     :param flags: Mock parameter used for CPU/GPU interfaces similarity.
 
     :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
 
-    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
+    :param stream: Stream for the asynchronous version.
+
+Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
 
 .. seealso:: :ocv:func:`mulSpectrums`
 
 
 
 gpu::mulAndScaleSpectrums
------------------------------
+-------------------------
 Performs a per-element multiplication of two Fourier spectrums and scales the result.
 
-.. ocv:function:: void gpu::mulAndScaleSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null())
 
-    :param a: First spectrum.
+    :param src1: First spectrum.
 
-    :param b: Second spectrum with the same size and type as  ``a`` .
+    :param src2: Second spectrum with the same size and type as  ``a`` .
 
-    :param c: Destination spectrum.
+    :param dst: Destination spectrum.
 
     :param flags: Mock parameter used for CPU/GPU interfaces similarity.
 
@@ -83,17 +85,17 @@ Performs a per-element multiplication of two Fourier spectrums and scales the re
 
     :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication.
 
-    Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
+Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now.
 
 .. seealso:: :ocv:func:`mulSpectrums`
 
 
 
 gpu::dft
-------------
+--------
 Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix.
 
-.. ocv:function:: void gpu::dft( const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null())
 
     :param src: Source matrix (real or complex).
 
@@ -125,46 +127,25 @@ The source matrix should be continuous, otherwise reallocation and data copying
 
 
 
-gpu::ConvolveBuf
+gpu::Convolution
 ----------------
-.. ocv:struct:: gpu::ConvolveBuf
+.. ocv:class:: gpu::Convolution : public Algorithm
 
-Class providing a memory buffer for :ocv:func:`gpu::convolve` function, plus it allows to adjust some specific parameters. ::
+Base class for convolution (or cross-correlation) operator. ::
 
-    struct CV_EXPORTS ConvolveBuf
+    class CV_EXPORTS Convolution : public Algorithm
     {
-        Size result_size;
-        Size block_size;
-        Size user_block_size;
-        Size dft_size;
-        int spect_len;
-
-        GpuMat image_spect, templ_spect, result_spect;
-        GpuMat image_block, templ_block, result_data;
-
-        void create(Size image_size, Size templ_size);
-        static Size estimateBlockSize(Size result_size, Size templ_size);
+    public:
+        virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0;
     };
 
-You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
 
 
-
-gpu::ConvolveBuf::create
-------------------------
-.. ocv:function:: gpu::ConvolveBuf::create(Size image_size, Size templ_size)
-
-Constructs a buffer for :ocv:func:`gpu::convolve` function with respective arguments.
-
-
-
-gpu::convolve
------------------
+gpu::Convolution::convolve
+---------------------------
 Computes a convolution (or cross-correlation) of two images.
 
-.. ocv:function:: void gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr=false)
-
-.. ocv:function:: void gpu::convolve( const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::Convolution::convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null())
 
     :param image: Source image. Only  ``CV_32FC1`` images are supported for now.
 
@@ -174,38 +155,16 @@ Computes a convolution (or cross-correlation) of two images.
 
     :param ccorr: Flags to evaluate cross-correlation instead of convolution.
 
-    :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::ConvolveBuf`.
-
     :param stream: Stream for the asynchronous version.
 
 .. seealso:: :ocv:func:`gpu::filter2D`
 
 
 
-gpu::integral
------------------
-Computes an integral image.
+gpu::createConvolution
+----------------------
+Creates implementation for :ocv:class:`gpu::Convolution` .
 
-.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null())
+.. ocv:function:: Ptr<Convolution> createConvolution(Size user_block_size = Size())
 
-    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
-
-    :param sum: Integral image containing 32-bit unsigned integer values packed into  ``CV_32SC1`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`integral`
-
-
-
-gpu::sqrIntegral
---------------------
-Computes a squared integral image.
-
-.. ocv:function:: void gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null())
-
-    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
-
-    :param sqsum: Squared integral image containing 64-bit unsigned integer values packed into  ``CV_64FC1`` .
-
-    :param stream: Stream for the asynchronous version.
+    :param user_block_size: Block size. If you leave default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
diff --git a/modules/gpuarithm/doc/core.rst b/modules/gpuarithm/doc/core.rst
index 50599bcf2..624ea3e7b 100644
--- a/modules/gpuarithm/doc/core.rst
+++ b/modules/gpuarithm/doc/core.rst
@@ -6,12 +6,12 @@ Core Operations on Matrices
 
 
 gpu::merge
---------------
+----------
 Makes a multi-channel matrix out of several single-channel matrices.
 
-.. ocv:function:: void gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::merge(const std::vector<GpuMat>& src, OutputArray dst, Stream& stream = Stream::Null())
 
     :param src: Array/vector of source matrices.
 
@@ -26,12 +26,12 @@ Makes a multi-channel matrix out of several single-channel matrices.
 
 
 gpu::split
---------------
+----------
 Copies each plane of a multi-channel matrix into an array.
 
-.. ocv:function:: void gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::split(InputArray src, vector<GpuMat>& dst, Stream& stream = Stream::Null())
 
     :param src: Source matrix.
 
@@ -43,15 +43,95 @@ Copies each plane of a multi-channel matrix into an array.
 
 
 
+gpu::transpose
+--------------
+Transposes a matrix.
+
+.. ocv:function:: void gpu::transpose(InputArray src1, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src1: Source matrix. 1-, 4-, 8-byte element sizes are supported for now.
+
+    :param dst: Destination matrix.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`transpose`
+
+
+
+gpu::flip
+---------
+Flips a 2D matrix around vertical, horizontal, or both axes.
+
+.. ocv:function:: void gpu::flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null())
+
+    :param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth.
+
+    :param dst: Destination matrix.
+
+    :param flipCode: Flip mode for the source:
+
+        * ``0`` Flips around x-axis.
+
+        * ``> 0`` Flips around y-axis.
+
+        * ``< 0`` Flips around both axes.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`flip`
+
+
+
+gpu::LookUpTable
+----------------
+.. ocv:class:: gpu::LookUpTable : public Algorithm
+
+Base class for transform using lookup table. ::
+
+    class CV_EXPORTS LookUpTable : public Algorithm
+    {
+    public:
+        virtual void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0;
+    };
+
+.. seealso:: :ocv:func:`LUT`
+
+
+
+gpu::LookUpTable::transform
+---------------------------
+Transforms the source matrix into the destination matrix using the given look-up table: ``dst(I) = lut(src(I))`` .
+
+.. ocv:function:: void gpu::LookUpTable::transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.  ``CV_8UC1``  and  ``CV_8UC3``  matrices are supported for now.
+
+    :param dst: Destination matrix.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::createLookUpTable
+----------------------
+Creates implementation for :ocv:class:`gpu::LookUpTable` .
+
+.. ocv:function:: Ptr<LookUpTable> createLookUpTable(InputArray lut)
+
+    :param lut: Look-up table of 256 elements. It is a continuous ``CV_8U`` matrix.
+
+
+
 gpu::copyMakeBorder
 -----------------------
 Forms a border around an image.
 
-.. ocv:function:: void gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, Scalar value = Scalar(), Stream& stream = Stream::Null())
 
-    :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and  ``CV_32FC1`` types are supported.
+    :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and ``CV_32FC1`` types are supported.
 
-    :param dst: Destination image with the same type as  ``src``. The size is  ``Size(src.cols+left+right, src.rows+top+bottom)`` .
+    :param dst: Destination image with the same type as  ``src``. The size is ``Size(src.cols+left+right, src.rows+top+bottom)`` .
 
     :param top:
 
@@ -68,61 +148,3 @@ Forms a border around an image.
     :param stream: Stream for the asynchronous version.
 
 .. seealso:: :ocv:func:`copyMakeBorder`
-
-
-
-gpu::transpose
-------------------
-Transposes a matrix.
-
-.. ocv:function:: void gpu::transpose( const GpuMat& src1, GpuMat& dst, Stream& stream=Stream::Null() )
-
-    :param src1: Source matrix. 1-, 4-, 8-byte element sizes are supported for now (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc).
-
-    :param dst: Destination matrix.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`transpose`
-
-
-
-gpu::flip
--------------
-Flips a 2D matrix around vertical, horizontal, or both axes.
-
-.. ocv:function:: void gpu::flip( const GpuMat& a, GpuMat& b, int flipCode, Stream& stream=Stream::Null() )
-
-    :param a: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth.
-
-    :param b: Destination matrix.
-
-    :param flipCode: Flip mode for the source:
-
-        * ``0`` Flips around x-axis.
-
-        * ``>0`` Flips around y-axis.
-
-        * ``<0`` Flips around both axes.
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`flip`
-
-
-
-gpu::LUT
-------------
-Transforms the source matrix into the destination matrix using the given look-up table: ``dst(I) = lut(src(I))``
-
-.. ocv:function:: void gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source matrix.  ``CV_8UC1``  and  ``CV_8UC3``  matrices are supported for now.
-
-    :param lut: Look-up table of 256 elements. It is a continuous ``CV_8U`` matrix.
-
-    :param dst: Destination matrix with the same depth as  ``lut``  and the same number of channels as  ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`LUT`
diff --git a/modules/gpuarithm/doc/element_operations.rst b/modules/gpuarithm/doc/element_operations.rst
index eae2ad7a2..eb616c1c3 100644
--- a/modules/gpuarithm/doc/element_operations.rst
+++ b/modules/gpuarithm/doc/element_operations.rst
@@ -6,20 +6,16 @@ Per-element Operations
 
 
 gpu::add
-------------
+--------
 Computes a matrix-matrix or matrix-scalar sum.
 
-.. ocv:function:: void gpu::add( const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::add( const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() )
+    :param src1: First source matrix or scalar.
 
-    :param a: First source matrix.
+    :param src2: Second source matrix or scalar. Matrix should have the same size and type as ``src1`` .
 
-    :param b: Second source matrix to be added to ``a`` . Matrix should have the same size and type as ``a`` .
-
-    :param sc: A scalar to be added to ``a`` .
-
-    :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth.
+    :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth.
 
     :param mask: Optional operation mask, 8-bit single channel array, that specifies elements of the destination array to be changed.
 
@@ -32,20 +28,16 @@ Computes a matrix-matrix or matrix-scalar sum.
 
 
 gpu::subtract
------------------
+-------------
 Computes a matrix-matrix or matrix-scalar difference.
 
-.. ocv:function:: void gpu::subtract( const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::subtract( const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() )
+    :param src1: First source matrix or scalar.
 
-    :param a: First source matrix.
+    :param src2: Second source matrix or scalar. Matrix should have the same size and type as ``src1`` .
 
-    :param b: Second source matrix to be added to ``a`` . Matrix should have the same size and type as ``a`` .
-
-    :param sc: A scalar to be added to ``a`` .
-
-    :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth.
+    :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth.
 
     :param mask: Optional operation mask, 8-bit single channel array, that specifies elements of the destination array to be changed.
 
@@ -58,20 +50,16 @@ Computes a matrix-matrix or matrix-scalar difference.
 
 
 gpu::multiply
------------------
+-------------
 Computes a matrix-matrix or matrix-scalar per-element product.
 
-.. ocv:function:: void gpu::multiply( const GpuMat& a, const GpuMat& b, GpuMat& c, double scale=1, int dtype=-1, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::multiply( const GpuMat& a, const Scalar& sc, GpuMat& c, double scale=1, int dtype=-1, Stream& stream=Stream::Null() )
+    :param src1: First source matrix or scalar.
 
-    :param a: First source matrix.
+    :param src2: Second source matrix or scalar.
 
-    :param b: Second source matrix to be multiplied by ``a`` elements.
-
-    :param sc: A scalar to be multiplied by ``a`` elements.
-
-    :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth.
+    :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth.
 
     :param scale: Optional scale factor.
 
@@ -87,19 +75,15 @@ gpu::divide
 -----------
 Computes a matrix-matrix or matrix-scalar division.
 
-.. ocv:function:: void gpu::divide( const GpuMat& a, const GpuMat& b, GpuMat& c, double scale=1, int dtype=-1, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::divide(double src1, InputArray src2, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::divide( double scale, const GpuMat& b, GpuMat& c, int dtype=-1, Stream& stream=Stream::Null() )
+    :param src1: First source matrix or a scalar.
 
-    :param a: First source matrix or a scalar.
+    :param src2: Second source matrix or scalar.
 
-    :param b: Second source matrix. The ``a`` elements are divided by it.
-
-    :param sc: A scalar to be divided by the elements of ``a`` matrix.
-
-    :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth.
+    :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth.
 
     :param scale: Optional scale factor.
 
@@ -113,11 +97,296 @@ This function, in contrast to :ocv:func:`divide`, uses a round-down rounding mod
 
 
 
+gpu::absdiff
+------------
+Computes per-element absolute difference of two matrices (or of a matrix and scalar).
+
+.. ocv:function:: void gpu::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src1: First source matrix or scalar.
+
+    :param src2: Second source matrix or scalar.
+
+    :param dst: Destination matrix that has the same size and type as the input array(s).
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`absdiff`
+
+
+
+gpu::abs
+--------
+Computes an absolute value of each matrix element.
+
+.. ocv:function:: void gpu::abs(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`abs`
+
+
+
+gpu::sqr
+--------
+Computes a square value of each matrix element.
+
+.. ocv:function:: void gpu::sqr(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::sqrt
+---------
+Computes a square root of each matrix element.
+
+.. ocv:function:: void gpu::sqrt(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`sqrt`
+
+
+
+gpu::exp
+--------
+Computes an exponent of each matrix element.
+
+.. ocv:function:: void gpu::exp(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`exp`
+
+
+
+gpu::log
+--------
+Computes a natural logarithm of absolute value of each matrix element.
+
+.. ocv:function:: void gpu::log(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`log`
+
+
+
+gpu::pow
+--------
+Raises every matrix element to a power.
+
+.. ocv:function:: void gpu::pow(InputArray src, double power, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param power: Exponent of power.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+The function ``pow`` raises every element of the input matrix to ``power`` :
+
+.. math::
+
+    \texttt{dst} (I) =  \fork{\texttt{src}(I)^power}{if \texttt{power} is integer}{|\texttt{src}(I)|^power}{otherwise}
+
+.. seealso:: :ocv:func:`pow`
+
+
+
+gpu::compare
+------------
+Compares elements of two matrices (or of a matrix and scalar).
+
+.. ocv:function:: void gpu::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null())
+
+    :param src1: First source matrix or scalar.
+
+    :param src2: Second source matrix or scalar.
+
+    :param dst: Destination matrix that has the same size and type as the input array(s).
+
+    :param cmpop: Flag specifying the relation between the elements to be checked:
+
+            * **CMP_EQ:** ``a(.) == b(.)``
+            * **CMP_GT:** ``a(.) < b(.)``
+            * **CMP_GE:** ``a(.) <= b(.)``
+            * **CMP_LT:** ``a(.) < b(.)``
+            * **CMP_LE:** ``a(.) <= b(.)``
+            * **CMP_NE:** ``a(.) != b(.)``
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`compare`
+
+
+
+gpu::bitwise_not
+----------------
+Performs a per-element bitwise inversion.
+
+.. ocv:function:: void gpu::bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null())
+
+    :param src: Source matrix.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param mask: Optional operation mask. 8-bit single channel image.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::bitwise_or
+---------------
+Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar).
+
+.. ocv:function:: void gpu::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null())
+
+    :param src1: First source matrix or scalar.
+
+    :param src2: Second source matrix or scalar.
+
+    :param dst: Destination matrix that has the same size and type as the input array(s).
+
+    :param mask: Optional operation mask. 8-bit single channel image.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::bitwise_and
+----------------
+Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar).
+
+.. ocv:function:: void gpu::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null())
+
+    :param src1: First source matrix or scalar.
+
+    :param src2: Second source matrix or scalar.
+
+    :param dst: Destination matrix that has the same size and type as the input array(s).
+
+    :param mask: Optional operation mask. 8-bit single channel image.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::bitwise_xor
+----------------
+Performs a per-element bitwise ``exclusive or`` operation of two matrices (or of matrix and scalar).
+
+.. ocv:function:: void gpu::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null())
+
+    :param src1: First source matrix or scalar.
+
+    :param src2: Second source matrix or scalar.
+
+    :param dst: Destination matrix that has the same size and type as the input array(s).
+
+    :param mask: Optional operation mask. 8-bit single channel image.
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::rshift
+-----------
+Performs pixel by pixel right shift of an image by a constant value.
+
+.. ocv:function:: void gpu::rshift(InputArray src, Scalar_<int> val, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix. Supports 1, 3 and 4 channels images with integers elements.
+
+    :param val: Constant values, one per channel.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::lshift
+-----------
+Performs pixel by pixel right left of an image by a constant value.
+
+.. ocv:function:: void gpu::lshift(InputArray src, Scalar_<int> val, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32S`` depth.
+
+    :param val: Constant values, one per channel.
+
+    :param dst: Destination matrix with the same size and type as ``src`` .
+
+    :param stream: Stream for the asynchronous version.
+
+
+
+gpu::min
+--------
+Computes the per-element minimum of two matrices (or a matrix and a scalar).
+
+.. ocv:function:: void gpu::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src1: First source matrix or scalar.
+
+    :param src2: Second source matrix or scalar.
+
+    :param dst: Destination matrix that has the same size and type as the input array(s).
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`min`
+
+
+
+gpu::max
+--------
+Computes the per-element maximum of two matrices (or a matrix and a scalar).
+
+.. ocv:function:: void gpu::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null())
+
+    :param src1: First source matrix or scalar.
+
+    :param src2: Second source matrix or scalar.
+
+    :param dst: Destination matrix that has the same size and type as the input array(s).
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`max`
+
+
+
 gpu::addWeighted
 ----------------
 Computes the weighted sum of two arrays.
 
-.. ocv:function:: void gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::addWeighted(InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null())
 
     :param src1: First source array.
 
@@ -147,311 +416,11 @@ where ``I`` is a multi-dimensional index of array elements. In case of multi-cha
 
 
 
-gpu::abs
-------------
-Computes an absolute value of each matrix element.
-
-.. ocv:function:: void gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source matrix. Supports ``CV_16S`` and ``CV_32F`` depth.
-
-    :param dst: Destination matrix with the same size and type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`abs`
-
-
-
-gpu::sqr
-------------
-Computes a square value of each matrix element.
-
-.. ocv:function:: void gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
-
-    :param dst: Destination matrix with the same size and type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::sqrt
-------------
-Computes a square root of each matrix element.
-
-.. ocv:function:: void gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
-
-    :param dst: Destination matrix with the same size and type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`sqrt`
-
-
-
-gpu::exp
-------------
-Computes an exponent of each matrix element.
-
-.. ocv:function:: void gpu::exp( const GpuMat& a, GpuMat& b, Stream& stream=Stream::Null() )
-
-    :param a: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
-
-    :param b: Destination matrix with the same size and type as ``a`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`exp`
-
-
-
-gpu::log
-------------
-Computes a natural logarithm of absolute value of each matrix element.
-
-.. ocv:function:: void gpu::log( const GpuMat& a, GpuMat& b, Stream& stream=Stream::Null() )
-
-    :param a: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
-
-    :param b: Destination matrix with the same size and type as ``a`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`log`
-
-
-
-gpu::pow
-------------
-Raises every matrix element to a power.
-
-.. ocv:function:: void gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src: Source matrix. Supports all type, except ``CV_64F`` depth.
-
-    :param power: Exponent of power.
-
-    :param dst: Destination matrix with the same size and type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-The function ``pow`` raises every element of the input matrix to ``p`` :
-
-.. math::
-
-    \texttt{dst} (I) =  \fork{\texttt{src}(I)^p}{if \texttt{p} is integer}{|\texttt{src}(I)|^p}{otherwise}
-
-.. seealso:: :ocv:func:`pow`
-
-
-
-gpu::absdiff
-----------------
-Computes per-element absolute difference of two matrices (or of a matrix and scalar).
-
-.. ocv:function:: void gpu::absdiff( const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream=Stream::Null() )
-
-.. ocv:function:: void gpu::absdiff( const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream=Stream::Null() )
-
-    :param a: First source matrix.
-
-    :param b: Second source matrix to be added to ``a`` .
-
-    :param s: A scalar to be added to ``a`` .
-
-    :param c: Destination matrix with the same size and type as ``a`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`absdiff`
-
-
-
-gpu::compare
-----------------
-Compares elements of two matrices.
-
-.. ocv:function:: void gpu::compare( const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream=Stream::Null() )
-
-.. ocv:function:: void gpu::compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null())
-
-    :param a: First source matrix.
-
-    :param b: Second source matrix with the same size and type as ``a`` .
-
-    :param sc: A scalar to be compared with ``a`` .
-
-    :param c: Destination matrix with the same size as ``a`` and the ``CV_8UC1`` type.
-
-    :param cmpop: Flag specifying the relation between the elements to be checked:
-
-            * **CMP_EQ:** ``a(.) == b(.)``
-            * **CMP_GT:** ``a(.) < b(.)``
-            * **CMP_GE:** ``a(.) <= b(.)``
-            * **CMP_LT:** ``a(.) < b(.)``
-            * **CMP_LE:** ``a(.) <= b(.)``
-            * **CMP_NE:** ``a(.) != b(.)``
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`compare`
-
-
-
-gpu::bitwise_not
---------------------
-Performs a per-element bitwise inversion.
-
-.. ocv:function:: void gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
-
-    :param src: Source matrix.
-
-    :param dst: Destination matrix with the same size and type as ``src`` .
-
-    :param mask: Optional operation mask. 8-bit single channel image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::bitwise_or
--------------------
-Performs a per-element bitwise disjunction of two matrices or of matrix and scalar.
-
-.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
-.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src1: First source matrix.
-
-    :param src2: Second source matrix with the same size and type as ``src1`` .
-
-    :param dst: Destination matrix with the same size and type as ``src1`` .
-
-    :param mask: Optional operation mask. 8-bit single channel image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::bitwise_and
---------------------
-Performs a per-element bitwise conjunction of two matrices or of matrix and scalar.
-
-.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
-.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src1: First source matrix.
-
-    :param src2: Second source matrix with the same size and type as ``src1`` .
-
-    :param dst: Destination matrix with the same size and type as ``src1`` .
-
-    :param mask: Optional operation mask. 8-bit single channel image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::bitwise_xor
---------------------
-Performs a per-element bitwise ``exclusive or`` operation of two matrices of matrix and scalar.
-
-.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
-.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src1: First source matrix.
-
-    :param src2: Second source matrix with the same size and type as ``src1`` .
-
-    :param dst: Destination matrix with the same size and type as ``src1`` .
-
-    :param mask: Optional operation mask. 8-bit single channel image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::rshift
---------------------
-Performs pixel by pixel right shift of an image by a constant value.
-
-.. ocv:function:: void gpu::rshift( const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream=Stream::Null() )
-
-    :param src: Source matrix. Supports 1, 3 and 4 channels images with integers elements.
-
-    :param sc: Constant values, one per channel.
-
-    :param dst: Destination matrix with the same size and type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::lshift
---------------------
-Performs pixel by pixel right left of an image by a constant value.
-
-.. ocv:function:: void gpu::lshift( const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream=Stream::Null() )
-
-    :param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32S`` depth.
-
-    :param sc: Constant values, one per channel.
-
-    :param dst: Destination matrix with the same size and type as ``src`` .
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::min
-------------
-Computes the per-element minimum of two matrices (or a matrix and a scalar).
-
-.. ocv:function:: void gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src1: First source matrix.
-
-    :param src2: Second source matrix or a scalar to compare ``src1`` elements with.
-
-    :param dst: Destination matrix with the same size and type as ``src1`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`min`
-
-
-
-gpu::max
-------------
-Computes the per-element maximum of two matrices (or a matrix and a scalar).
-
-.. ocv:function:: void gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null())
-
-.. ocv:function:: void gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null())
-
-    :param src1: First source matrix.
-
-    :param src2: Second source matrix or a scalar to compare ``src1`` elements with.
-
-    :param dst: Destination matrix with the same size and type as ``src1`` .
-
-    :param stream: Stream for the asynchronous version.
-
-.. seealso:: :ocv:func:`max`
-
-
-
 gpu::threshold
-------------------
+--------------
 Applies a fixed-level threshold to each array element.
 
-.. ocv:function:: double gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null())
+.. ocv:function:: double gpu::threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type, Stream& stream = Stream::Null())
 
     :param src: Source array (single-channel).
 
@@ -470,12 +439,12 @@ Applies a fixed-level threshold to each array element.
 
 
 gpu::magnitude
-------------------
+--------------
 Computes magnitudes of complex matrix elements.
 
-.. ocv:function:: void gpu::magnitude( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::magnitude(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null())
 
-.. ocv:function:: void gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::magnitude(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null())
 
     :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
 
@@ -492,12 +461,12 @@ Computes magnitudes of complex matrix elements.
 
 
 gpu::magnitudeSqr
----------------------
+-----------------
 Computes squared magnitudes of complex matrix elements.
 
-.. ocv:function:: void gpu::magnitudeSqr( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
+.. ocv:function:: void gpu::magnitudeSqr(InputArray xy, OutputArray magnitude, Stream& stream=Stream::Null() )
 
-.. ocv:function:: void gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null())
 
     :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
 
@@ -512,10 +481,10 @@ Computes squared magnitudes of complex matrix elements.
 
 
 gpu::phase
---------------
+----------
 Computes polar angles of complex matrix elements.
 
-.. ocv:function:: void gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null())
 
     :param x: Source matrix containing real components ( ``CV_32FC1`` ).
 
@@ -532,10 +501,10 @@ Computes polar angles of complex matrix elements.
 
 
 gpu::cartToPolar
---------------------
+----------------
 Converts Cartesian coordinates into polar.
 
-.. ocv:function:: void gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null())
 
     :param x: Source matrix containing real components ( ``CV_32FC1`` ).
 
@@ -554,10 +523,10 @@ Converts Cartesian coordinates into polar.
 
 
 gpu::polarToCart
---------------------
+----------------
 Converts polar coordinates into Cartesian.
 
-.. ocv:function:: void gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees=false, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null())
 
     :param magnitude: Source matrix containing magnitudes ( ``CV_32FC1`` ).
 
diff --git a/modules/gpuarithm/doc/reductions.rst b/modules/gpuarithm/doc/reductions.rst
index 938efc35b..b34c2d860 100644
--- a/modules/gpuarithm/doc/reductions.rst
+++ b/modules/gpuarithm/doc/reductions.rst
@@ -6,16 +6,16 @@ Matrix Reductions
 
 
 gpu::norm
--------------
+---------
 Returns the norm of a matrix (or difference of two matrices).
 
-.. ocv:function:: double gpu::norm(const GpuMat& src1, int normType=NORM_L2)
+.. ocv:function:: double gpu::norm(InputArray src1, int normType)
 
-.. ocv:function:: double gpu::norm(const GpuMat& src1, int normType, GpuMat& buf)
+.. ocv:function:: double gpu::norm(InputArray src1, int normType, GpuMat& buf)
 
-.. ocv:function:: double gpu::norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf)
+.. ocv:function:: double gpu::norm(InputArray src1, int normType, InputArray mask, GpuMat& buf)
 
-.. ocv:function:: double gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2)
+.. ocv:function:: double gpu::norm(InputArray src1, InputArray src2, int normType=NORM_L2)
 
     :param src1: Source matrix. Any matrices except 64F are supported.
 
@@ -32,14 +32,14 @@ Returns the norm of a matrix (or difference of two matrices).
 
 
 gpu::sum
-------------
+--------
 Returns the sum of matrix elements.
 
-.. ocv:function:: Scalar gpu::sum(const GpuMat& src)
+.. ocv:function:: Scalar gpu::sum(InputArray src)
 
-.. ocv:function:: Scalar gpu::sum(const GpuMat& src, GpuMat& buf)
+.. ocv:function:: Scalar gpu::sum(InputArray src, GpuMat& buf)
 
-.. ocv:function:: Scalar gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
+.. ocv:function:: Scalar gpu::sum(InputArray src, InputArray mask, GpuMat& buf)
 
     :param src: Source image of any depth except for ``CV_64F`` .
 
@@ -52,14 +52,14 @@ Returns the sum of matrix elements.
 
 
 gpu::absSum
----------------
+-----------
 Returns the sum of absolute values for matrix elements.
 
-.. ocv:function:: Scalar gpu::absSum(const GpuMat& src)
+.. ocv:function:: Scalar gpu::absSum(InputArray src)
 
-.. ocv:function:: Scalar gpu::absSum(const GpuMat& src, GpuMat& buf)
+.. ocv:function:: Scalar gpu::absSum(InputArray src, GpuMat& buf)
 
-.. ocv:function:: Scalar gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
+.. ocv:function:: Scalar gpu::absSum(InputArray src, InputArray mask, GpuMat& buf)
 
     :param src: Source image of any depth except for ``CV_64F`` .
 
@@ -70,14 +70,14 @@ Returns the sum of absolute values for matrix elements.
 
 
 gpu::sqrSum
----------------
+-----------
 Returns the squared sum of matrix elements.
 
-.. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src)
+.. ocv:function:: Scalar gpu::sqrSum(InputArray src)
 
-.. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src, GpuMat& buf)
+.. ocv:function:: Scalar gpu::sqrSum(InputArray src, GpuMat& buf)
 
-.. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf)
+.. ocv:function:: Scalar gpu::sqrSum(InputArray src, InputArray mask, GpuMat& buf)
 
     :param src: Source image of any depth except for ``CV_64F`` .
 
@@ -88,12 +88,12 @@ Returns the squared sum of matrix elements.
 
 
 gpu::minMax
----------------
+-----------
 Finds global minimum and maximum matrix elements and returns their values.
 
-.. ocv:function:: void gpu::minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat())
+.. ocv:function:: void gpu::minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray())
 
-.. ocv:function:: void gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
+.. ocv:function:: void gpu::minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf)
 
     :param src: Single-channel source image.
 
@@ -112,12 +112,12 @@ The function does not work with ``CV_64F`` images on GPUs with the compute capab
 
 
 gpu::minMaxLoc
-------------------
+--------------
 Finds global minimum and maximum matrix elements and returns their values with locations.
 
-.. ocv:function:: void gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, const GpuMat& mask=GpuMat())
+.. ocv:function:: void gpu::minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, InputArray mask=noArray())
 
-.. ocv:function:: void gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf)
+.. ocv:function:: void gpu::minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray mask, GpuMat& valbuf, GpuMat& locbuf)
 
     :param src: Single-channel source image.
 
@@ -142,12 +142,12 @@ Finds global minimum and maximum matrix elements and returns their values with l
 
 
 gpu::countNonZero
----------------------
+-----------------
 Counts non-zero matrix elements.
 
-.. ocv:function:: int gpu::countNonZero(const GpuMat& src)
+.. ocv:function:: int gpu::countNonZero(InputArray src)
 
-.. ocv:function:: int gpu::countNonZero(const GpuMat& src, GpuMat& buf)
+.. ocv:function:: int gpu::countNonZero(InputArray src, GpuMat& buf)
 
     :param src: Single-channel source image.
 
@@ -163,7 +163,7 @@ gpu::reduce
 -----------
 Reduces a matrix to a vector.
 
-.. ocv:function:: void gpu::reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null())
 
     :param mtx: Source 2D matrix.
 
@@ -183,48 +183,20 @@ Reduces a matrix to a vector.
 
     :param dtype: When it is negative, the destination vector will have the same type as the source matrix. Otherwise, its type will be  ``CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels())`` .
 
+    :param stream: Stream for the asynchronous version.
+
 The function ``reduce`` reduces the matrix to a vector by treating the matrix rows/columns as a set of 1D vectors and performing the specified operation on the vectors until a single row/column is obtained. For example, the function can be used to compute horizontal and vertical projections of a raster image. In case of ``CV_REDUCE_SUM`` and ``CV_REDUCE_AVG`` , the output may have a larger element bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction modes.
 
 .. seealso:: :ocv:func:`reduce`
 
 
 
-gpu::normalize
---------------
-Normalizes the norm or value range of an array.
-
-.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat())
-
-.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
-
-    :param src: input array.
-
-    :param dst: output array of the same size as  ``src`` .
-
-    :param alpha: norm value to normalize to or the lower range boundary in case of the range normalization.
-
-    :param beta: upper range boundary in case of the range normalization; it is not used for the norm normalization.
-
-    :param normType: normalization type (see the details below).
-
-    :param dtype: when negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as  ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``.
-
-    :param mask: optional operation mask.
-
-    :param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
-
-    :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
-
-.. seealso:: :ocv:func:`normalize`
-
-
-
 gpu::meanStdDev
--------------------
+---------------
 Computes a mean value and a standard deviation of matrix elements.
 
-.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev)
-.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf)
+.. ocv:function:: void gpu::meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev)
+.. ocv:function:: void gpu::meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf)
 
     :param mtx: Source matrix.  ``CV_8UC1``  matrices are supported for now.
 
@@ -239,10 +211,10 @@ Computes a mean value and a standard deviation of matrix elements.
 
 
 gpu::rectStdDev
--------------------
+---------------
 Computes a standard deviation of integral images.
 
-.. ocv:function:: void gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null())
+.. ocv:function:: void gpu::rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null())
 
     :param src: Source image. Only the ``CV_32SC1`` type is supported.
 
@@ -253,3 +225,71 @@ Computes a standard deviation of integral images.
     :param rect: Rectangular window.
 
     :param stream: Stream for the asynchronous version.
+
+
+
+gpu::normalize
+--------------
+Normalizes the norm or value range of an array.
+
+.. ocv:function:: void gpu::normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray())
+
+.. ocv:function:: void gpu::normalize(InputArray src, OutputArray dst, double alpha, double beta, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf)
+
+    :param src: Input array.
+
+    :param dst: Output array of the same size as  ``src`` .
+
+    :param alpha: Norm value to normalize to or the lower range boundary in case of the range normalization.
+
+    :param beta: Upper range boundary in case of the range normalization; it is not used for the norm normalization.
+
+    :param normType: Normalization type ( ``NORM_MINMAX`` , ``NORM_L2`` , ``NORM_L1`` or ``NORM_INF`` ).
+
+    :param dtype: When negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as  ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``.
+
+    :param mask: Optional operation mask.
+
+    :param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
+
+    :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
+
+.. seealso:: :ocv:func:`normalize`
+
+
+
+gpu::integral
+-------------
+Computes an integral image.
+
+.. ocv:function:: void gpu::integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null())
+
+    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
+
+    :param sum: Integral image containing 32-bit unsigned integer values packed into  ``CV_32SC1`` .
+
+    :param buffer: Optional buffer to avoid extra memory allocations. It is resized automatically.
+
+    :param stream: Stream for the asynchronous version.
+
+.. seealso:: :ocv:func:`integral`
+
+
+
+gpu::sqrIntegral
+----------------
+Computes a squared integral image.
+
+.. ocv:function:: void gpu::sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null())
+
+.. ocv:function:: void gpu::sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null())
+
+    :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
+
+    :param sqsum: Squared integral image containing 64-bit unsigned integer values packed into  ``CV_64FC1`` .
+
+    :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
+
+    :param stream: Stream for the asynchronous version.
diff --git a/modules/gpufilters/doc/filtering.rst b/modules/gpufilters/doc/filtering.rst
index 348a42510..79c2ea51c 100644
--- a/modules/gpufilters/doc/filtering.rst
+++ b/modules/gpufilters/doc/filtering.rst
@@ -381,7 +381,7 @@ Creates a non-separable linear filter.
 
     :param dstType: Output image type. The same type as ``src`` is supported.
 
-    :param kernel: 2D array of filter coefficients. Floating-point coefficients will be converted to fixed-point representation before the actual processing. Supports size up to 16. For larger kernels use :ocv:func:`gpu::convolve`.
+    :param kernel: 2D array of filter coefficients. Floating-point coefficients will be converted to fixed-point representation before the actual processing. Supports size up to 16. For larger kernels use :ocv:class:`gpu::Convolution`.
 
     :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center.
 
@@ -411,7 +411,7 @@ Applies the non-separable 2D linear filter to an image.
 
     :param stream: Stream for the asynchronous version.
 
-.. seealso:: :ocv:func:`filter2D`, :ocv:func:`gpu::convolve`
+.. seealso:: :ocv:func:`filter2D`, :ocv:class:`gpu::Convolution`