Merge branch '2.4'

2013-02-22 17:33:30 +04:00 · 2013-02-22 17:33:30 +04:00 · 0b8a6da817
commit 0b8a6da817
parent 0ccdc5b4af e6dd4e840d
47 changed files with 669 additions and 376 deletions
--- a/3rdparty/ffmpeg/opencv_ffmpeg.dll
+++ b/3rdparty/ffmpeg/opencv_ffmpeg.dll
--- a/3rdparty/ffmpeg/opencv_ffmpeg_64.dll
+++ b/3rdparty/ffmpeg/opencv_ffmpeg_64.dll
--- a/android/service/doc/BaseLoaderCallback.rst
+++ b/android/service/doc/BaseLoaderCallback.rst
@ -48,7 +48,7 @@ See the "15-puzzle" OpenCV sample for details.
        super.onResume();
        Log.i(TAG, "Trying to load OpenCV library");
-        if (!OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mOpenCVCallBack))
+        if (!OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_4, this, mOpenCVCallBack))
        {
            Log.e(TAG, "Cannot connect to OpenCV Manager");
        }
--- a/android/service/doc/JavaHelper.rst
+++ b/android/service/doc/JavaHelper.rst
@ -47,3 +47,7 @@ OpenCV version constants
 .. data:: OPENCV_VERSION_2_4_3
    OpenCV Library version 2.4.3
 .. data:: OPENCV_VERSION_2_4_4
    OpenCV Library version 2.4.4
--- a/android/service/engine/AndroidManifest.xml
+++ b/android/service/engine/AndroidManifest.xml
@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="org.opencv.engine"
-    android:versionCode="25@ANDROID_PLATFORM_VERSION_CODE@"
+    android:versionCode="26@ANDROID_PLATFORM_VERSION_CODE@"
-    android:versionName="2.5" >
+    android:versionName="2.6" >
    <uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" />
    <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
--- a/android/service/readme.txt
+++ b/android/service/readme.txt
@ -14,20 +14,20 @@ manually using adb tool:
 .. code-block:: sh
-    adb install OpenCV-2.4.3-android-sdk/apk/OpenCV_2.4.3.2_Manager_2.4_<platform>.apk
+    adb install OpenCV-2.4.4-android-sdk/apk/OpenCV_2.4.4_Manager_2.6_<platform>.apk
 Use the table below to determine proper OpenCV Manager package for your device:
-+------------------------------+--------------+-----------------------------------------------------+
+------------------------------+--------------+---------------------------------------------------+
 | Hardware Platform            | Android ver. | Package name                                      |
-+==============================+==============+=====================================================+
+==============================+==============+===================================================+
-| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_armv7a-neon.apk          |
+| armeabi-v7a (ARMv7-A + NEON) |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_armv7a-neon.apk          |
-+------------------------------+--------------+-----------------------------------------------------+
+------------------------------+--------------+---------------------------------------------------+
-| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.3.2_Manager_2.4_armv7a-neon-android8.apk |
+| armeabi-v7a (ARMv7-A + NEON) |     = 2.2    | OpenCV_2.4.4_Manager_2.6_armv7a-neon-android8.apk |
-+------------------------------+--------------+-----------------------------------------------------+
+------------------------------+--------------+---------------------------------------------------+
-| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_armeabi.apk              |
+| armeabi (ARMv5, ARMv6)       |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_armeabi.apk              |
-+------------------------------+--------------+-----------------------------------------------------+
+------------------------------+--------------+---------------------------------------------------+
-| Intel x86                    |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_x86.apk                  |
+| Intel x86                    |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_x86.apk                  |
-+------------------------------+--------------+-----------------------------------------------------+
+------------------------------+--------------+---------------------------------------------------+
-| MIPS                         |    >= 2.3    | OpenCV_2.4.3.2_Manager_2.4_mips.apk                 |
+| MIPS                         |    >= 2.3    | OpenCV_2.4.4_Manager_2.6_mips.apk                 |
-+------------------------------+--------------+-----------------------------------------------------+
+------------------------------+--------------+---------------------------------------------------+
--- a/cmake/OpenCVDetectOpenCL.cmake
+++ b/cmake/OpenCVDetectOpenCL.cmake
@ -81,6 +81,7 @@ else()
    set(ENV_AMDAPPSDKROOT $ENV{AMDAPPSDKROOT})
    set(ENV_OPENCLROOT $ENV{OPENCLROOT})
    set(ENV_CUDA_PATH $ENV{CUDA_PATH})
    set(ENV_INTELOCLSDKROOT $ENV{INTELOCLSDKROOT})
    if(ENV_AMDSTREAMSDKROOT)
      set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_AMDAPPSDKROOT}/include)
      if(CMAKE_SIZEOF_VOID_P EQUAL 4)
@ -109,6 +110,13 @@ else()
      else()
        set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} /usr/lib64)
      endif()
    elseif(ENV_INTELOCLSDKROOT)
      set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_INTELOCLSDKROOT}/include)
      if(CMAKE_SIZEOF_VOID_P EQUAL 4)
        set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x86)
      else()
        set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x64)
      endif()
    endif()
    if(OPENCL_INCLUDE_SEARCH_PATH)
--- a/cmake/OpenCVGenAndroidMK.cmake
+++ b/cmake/OpenCVGenAndroidMK.cmake
@ -20,7 +20,7 @@ if(ANDROID)
  endif()
  # setup lists of camera libs
-  foreach(abi ARMEABI ARMEABI_V7A X86)
+  foreach(abi ARMEABI ARMEABI_V7A X86 MIPS)
    ANDROID_GET_ABI_RAWNAME(${abi} ndkabi)
    if(BUILD_ANDROID_CAMERA_WRAPPER)
      if(ndkabi STREQUAL ANDROID_NDK_ABI_NAME)
--- a/cmake/templates/OpenCV.mk.in
+++ b/cmake/templates/OpenCV.mk.in
@ -57,6 +57,9 @@ ifeq (${OPENCV_CAMERA_MODULES},on)
    ifeq ($(TARGET_ARCH_ABI),x86)
        OPENCV_CAMERA_MODULES:=@OPENCV_CAMERA_LIBS_X86_CONFIGCMAKE@
    endif
    ifeq ($(TARGET_ARCH_ABI),mips)
        OPENCV_CAMERA_MODULES:=@OPENCV_CAMERA_LIBS_MIPS_CONFIGCMAKE@
    endif
 else
    OPENCV_CAMERA_MODULES:=
 endif
--- a/doc/tutorials/definitions/tocDefinitions.rst
+++ b/doc/tutorials/definitions/tocDefinitions.rst
@ -10,3 +10,4 @@
 .. |Author_AlexB| unicode:: Alexandre U+0020 Benoit
 .. |Author_EricCh| unicode:: Eric U+0020 Christiansen
 .. |Author_AndreyP| unicode:: Andrey U+0020 Pavlenko
 .. |Author_AlexS| unicode:: Alexander U+0020 Smorkalov
--- a/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst
+++ b/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst
@ -0,0 +1,115 @@
 .. _ARM-Linux-cross-compile:
 Cross compilation for ARM based Linux systems
 *********************************************
 This steps are tested on Ubuntu Linux 12.04, but should work for other Linux distributions.
 I case of other distributions package names and names of cross compilation tools may differ.
 There are several popular EABI versions that are used on ARM platform. This tutorial is
 written for *gnueabi* and *gnueabihf*, but other variants should work with minimal changes.
 Prerequisites
 =============
  * Host computer with Linux;
  * Git;
  * CMake 2.6 or higher;
  * Cross compilation tools for ARM: gcc, libstc++, etc. Depending on target platform you need
    to choose *gnueabi* or *gnueabihf* tools.
    Install command for *gnueabi*:
    .. code-block:: bash
       sudo apt-get install gcc-arm-linux-gnueabi
    Install command for *gnueabihf*:
    .. code-block:: bash
       sudo apt-get install gcc-arm-linux-gnueabihf
  * pkgconfig;
  * Python 2.6 for host system;
  * [optional] ffmpeg or libav development packages for armeabi(hf): libavcodec-dev, libavformat-dev, libswscale-dev;
  * [optional] GTK+2.x or higher, including headers (libgtk2.0-dev) for armeabi(hf);
  * [optional] libdc1394 2.x;
  * [optional] libjpeg-dev, libpng-dev, libtiff-dev, libjasper-dev for armeabi(hf).
 Getting OpenCV Source Code
 ==========================
 You can use the latest stable OpenCV version available in *sourceforge* or you can grab the latest
 snapshot from our `Git repository <https://github.com/Itseez/opencv.git>`_.
 Getting the Latest Stable OpenCV Version
 ----------------------------------------
 * Go to our `page on Sourceforge <http://sourceforge.net/projects/opencvlibrary>`_;
 * Download the source tarball and unpack it.
 Getting the Cutting-edge OpenCV from the Git Repository
 -------------------------------------------------------
 Launch Git client and clone `OpenCV repository <http://github.com/itseez/opencv>`_
 In Linux it can be achieved with the following command in Terminal:
 .. code-block:: bash
   cd ~/<my_working _directory>
   git clone https://github.com/Itseez/opencv.git
 Building OpenCV
 ===============
 #. Create a build directory, make it current and run the following command:
   .. code-block:: bash
      cmake [<some optional parameters>] -DCMAKE_TOOLCHAIN_FILE=<path to the OpenCV source directory>/platforms/linux/arm-gnueabi.toolchain.cmake <path to the OpenCV source directory>
   Toolchain uses *gnueabihf* EABI convention by default. Add ``-DSOFTFP=ON`` cmake argument to switch on softfp compiler.
   .. code-block:: bash
      cmake [<some optional parameters>] -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=<path to the OpenCV source directory>/platforms/linux/arm-gnueabi.toolchain.cmake <path to the OpenCV source directory>
   For example:
   .. code-block:: bash
      cd ~/opencv/platforms/linux
      mkdir -p build_hardfp
      cd build_hardfp
      cmake -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake ../../..
 #. Run make in build (<cmake_binary_dir>) directory:
   .. code-block:: bash
      make
 .. note::
   Optionally you can strip symbols info from the created library via install/strip make target.
   This option produces smaller binary (~ twice smaller) but makes further debugging harder.
 Enable hardware optimizations
 -----------------------------
 Depending on target platfrom architecture different instruction sets can be used. By default
 compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
 to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
 NEON SIMD extensions.
 TBB is supported on multi core ARM SoCs also.
 Add ``-DWITH_TBB=ON`` and ``-DBUILD_TBB=ON`` to enable it. Cmake scripts download TBB sources
 from official project site `<http://threadingbuildingblocks.org/>`_ and build it.
--- a/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst
+++ b/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.rst
@ -3,7 +3,9 @@
 Introduction to OpenCV
 -----------------------------------------------------------
-Here you can read tutorials about how to set up your computer to work with the OpenCV library. Additionaly you can find a few very basic sample source code that will let introduce you to the world of the OpenCV.
+Here you can read tutorials about how to set up your computer to work with the OpenCV library.
 Additionally you can find a few very basic sample source code that will let introduce you to the
 world of the OpenCV.
 .. include:: ../../definitions/tocDefinitions.rst
@ -189,6 +191,24 @@ Here you can read tutorials about how to set up your computer to work with the O
     .. |Install_iOS| image:: images/opencv_ios.png
                 :width:  90pt
 * **Embedded Linux**
  .. tabularcolumns:: m{100pt} m{300pt}
  .. cssclass:: toctableopencv
  =========== ======================================================
  |Usage_1|   **Title:** :ref:`ARM-Linux-cross-compile`
              *Compatibility:* > OpenCV 2.4.4
              *Author:* |Author_AlexS|
              We will learn how to setup OpenCV cross compilation environment for ARM Linux.
  =========== ======================================================
 * **Common**
  .. tabularcolumns:: m{100pt} m{300pt}
  .. cssclass:: toctableopencv
@ -249,7 +269,7 @@ Here you can read tutorials about how to set up your computer to work with the O
   \pagebreak
-.. We use a custom table of content format and as the table of content only imforms Sphinx about the hierarchy of the files, no need to show it.
+.. We use a custom table of content format and as the table of content only informs Sphinx about the hierarchy of the files, no need to show it.
 .. toctree::
   :hidden:
@ -263,6 +283,7 @@ Here you can read tutorials about how to set up your computer to work with the O
   ../android_binary_package/O4A_SDK
   ../android_binary_package/dev_with_OCV_on_Android
   ../ios_install/ios_install
   ../crosscompilation/arm_crosscompile_with_cmake
   ../display_image/display_image
   ../load_save_image/load_save_image
   ../how_to_write_a_tutorial/how_to_write_a_tutorial
--- a/modules/core/doc/basic_structures.rst
+++ b/modules/core/doc/basic_structures.rst
@ -418,27 +418,47 @@ Template class for smart reference-counting pointers ::
    };
-The ``Ptr<_Tp>`` class is a template class that wraps pointers of the corresponding type. It is similar to ``shared_ptr`` that is part of the Boost library (
+The ``Ptr<_Tp>`` class is a template class that wraps pointers of the corresponding type. It is
-http://www.boost.org/doc/libs/1_40_0/libs/smart_ptr/shared_ptr.htm
+similar to ``shared_ptr`` that is part of the Boost library
-) and also part of the `C++0x <http://en.wikipedia.org/wiki/C++0x>`_
+(http://www.boost.org/doc/libs/1_40_0/libs/smart_ptr/shared_ptr.htm) and also part of the
-standard.
+`C++0x <http://en.wikipedia.org/wiki/C++0x>`_ standard.
 This class provides the following options:
 *
-    Default constructor, copy constructor, and assignment operator for an arbitrary C++ class or a C structure. For some objects, like files, windows, mutexes, sockets, and others, a copy constructor or an assignment operator are difficult to define. For some other objects, like complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally, some of complex OpenCV and your own data structures may be written in C. However, copy constructors and default constructors can simplify programming a lot. Besides, they are often required (for example, by STL containers). By wrapping a pointer to such a complex object ``TObj``     to ``Ptr<TObj>`` , you automatically get all of the necessary constructors and the assignment operator.
+    Default constructor, copy constructor, and assignment operator for an arbitrary C++ class
    or a C structure. For some objects, like files, windows, mutexes, sockets, and others, a copy
    constructor or an assignment operator are difficult to define. For some other objects, like
    complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally,
    some of complex OpenCV and your own data structures may be written in C.
    However, copy constructors and default constructors can simplify programming a lot.Besides,
    they are often required (for example, by STL containers). By wrapping a pointer to such a
    complex object ``TObj`` to ``Ptr<TObj>``, you automatically get all of the necessary
    constructors and the assignment operator.
 *
-    *O(1)* complexity of the above-mentioned operations. While some structures, like ``std::vector``,   provide a copy constructor and an assignment operator, the operations may take a considerable amount of time if the data structures are large. But if the structures are put into ``Ptr<>``     , the overhead is small and independent of the data size.
+    *O(1)* complexity of the above-mentioned operations. While some structures, like ``std::vector``,
    provide a copy constructor and an assignment operator, the operations may take a considerable
    amount of time if the data structures are large. But if the structures are put into ``Ptr<>``,
    the overhead is small and independent of the data size.
 *
    Automatic destruction, even for C structures. See the example below with ``FILE*``.
 *
-    Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers can store only objects of the same type and the same size. The classical solution to store objects of different types in the same container is to store pointers to the base class ``base_class_t*``     instead but then you loose the automatic memory management. Again, by using ``Ptr<base_class_t>()``     instead of the raw pointers, you can solve the problem.
+    Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers
    can store only objects of the same type and the same size. The classical solution to store objects
    of different types in the same container is to store pointers to the base class ``base_class_t*``
    instead but then you loose the automatic memory management. Again, by using ``Ptr<base_class_t>()``
    instead of the raw pointers, you can solve the problem.
-The ``Ptr`` class treats the wrapped object as a black box. The reference counter is allocated and managed separately. The only thing the pointer class needs to know about the object is how to deallocate it. This knowledge is encapsulated in the ``Ptr::delete_obj()`` method that is called when the reference counter becomes 0. If the object is a C++ class instance, no additional coding is needed, because the default implementation of this method calls ``delete obj;`` .
+The ``Ptr`` class treats the wrapped object as a black box. The reference counter is allocated and
-However, if the object is deallocated in a different way, the specialized method should be created. For example, if you want to wrap ``FILE`` , the ``delete_obj`` may be implemented as follows: ::
+managed separately. The only thing the pointer class needs to know about the object is how to
 deallocate it. This knowledge is encapsulated in the ``Ptr::delete_obj()`` method that is called when
 the reference counter becomes 0. If the object is a C++ class instance, no additional coding is
 needed, because the default implementation of this method calls ``delete obj;``. However, if the
 object is deallocated in a different way, the specialized method should be created. For example,
 if you want to wrap ``FILE``, the ``delete_obj`` may be implemented as follows: ::
    template<> inline void Ptr<FILE>::delete_obj()
    {
@ -456,7 +476,73 @@ However, if the object is deallocated in a different way, the specialized method
    // the file will be closed automatically by the Ptr<FILE> destructor.
-.. note:: The reference increment/decrement operations are implemented as atomic operations, and therefore it is normally safe to use the classes in multi-threaded applications. The same is true for :ocv:class:`Mat` and other C++ OpenCV classes that operate on the reference counters.
+.. note:: The reference increment/decrement operations are implemented as atomic operations,
          and therefore it is normally safe to use the classes in multi-threaded applications.
          The same is true for :ocv:class:`Mat` and other C++ OpenCV classes that operate on
          the reference counters.
 Ptr::Ptr
 --------
 Various Ptr constructors.
 .. ocv:function:: Ptr::Ptr()
 .. ocv:function:: Ptr::Ptr(_Tp* _obj)
 .. ocv:function:: Ptr::Ptr(const Ptr& ptr)
 Ptr::~Ptr
 ---------
 The Ptr destructor.
 .. ocv:function:: Ptr::~Ptr()
 Ptr::operator =
 ----------------
 Assignment operator.
 .. ocv:function:: Ptr& Ptr::operator = (const Ptr& ptr)
 Decrements own reference counter (with ``release()``) and increments ptr's reference counter.
 Ptr::addref
 -----------
 Increments reference counter.
 .. ocv:function:: void Ptr::addref()
 Ptr::release
 ------------
 Decrements reference counter; when it becomes 0, ``delete_obj()`` is called.
 .. ocv:function:: void Ptr::release()
 Ptr::delete_obj
 ---------------
 User-specified custom object deletion operation. By default, ``delete obj;`` is called.
 .. ocv:function:: void Ptr::delete_obj()
 Ptr::empty
 ----------
 Returns true if obj == 0;
 bool empty() const;
 Ptr::operator ->
 ----------------
 Provide access to the object fields and methods.
 .. ocv:function:: template<typename _Tp> _Tp* Ptr::operator -> ()
 .. ocv:function:: template<typename _Tp> const _Tp* Ptr::operator -> () const
 Ptr::operator _Tp*
 ------------------
 Returns the underlying object pointer. Thanks to the methods, the ``Ptr<_Tp>`` can be used instead
 of ``_Tp*``.
 .. ocv:function:: template<typename _Tp> Ptr::operator _Tp* ()
 .. ocv:function:: template<typename _Tp> Ptr::operator const _Tp*() const
 Mat
 ---
--- a/modules/core/include/opencv2/core/gpumat.hpp
+++ b/modules/core/include/opencv2/core/gpumat.hpp
@ -73,12 +73,16 @@ namespace cv { namespace gpu
        FEATURE_SET_COMPUTE_20 = 20,
        FEATURE_SET_COMPUTE_21 = 21,
        FEATURE_SET_COMPUTE_30 = 30,
        FEATURE_SET_COMPUTE_35 = 35,
        GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
        SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
        NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
-        WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30
+        WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
        DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
    };
    // Checks whether current device supports the given feature
    CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
    // Gives information about what GPU archs this OpenCV GPU module was
@ -116,8 +120,9 @@ namespace cv { namespace gpu
        int multiProcessorCount() const { return multi_processor_count_; }
-        size_t sharedMemPerBlock() const { return sharedMemPerBlock_; }
+        size_t sharedMemPerBlock() const;
        void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
        size_t freeMemory() const;
        size_t totalMemory() const;
@ -131,7 +136,6 @@ namespace cv { namespace gpu
    private:
        void query();
        void queryMemory(size_t& free_memory, size_t& total_memory) const;
        int device_id_;
@ -139,7 +143,6 @@ namespace cv { namespace gpu
        int multi_processor_count_;
        int majorVersion_;
        int minorVersion_;
        size_t sharedMemPerBlock_;
    };
    CV_EXPORTS void printCudaDeviceInfo(int device);
@ -546,13 +549,6 @@ namespace cv { namespace gpu
    {
        ensureSizeIsEnough(size.height, size.width, type, m);
    }
    inline GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat)
    {
        if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
            return mat(Rect(0, 0, cols, rows));
        return mat = GpuMat(rows, cols, type);
    }
 }}
 #endif // __cplusplus
--- a/modules/core/perf/perf_compare.cpp
+++ b/modules/core/perf/perf_compare.cpp
@ -52,7 +52,8 @@ PERF_TEST_P( Size_MatType_CmpType, compareScalar,
    declare.in(src1, src2, WARMUP_RNG).out(dst);
-    TEST_CYCLE() cv::compare(src1, src2, dst, cmpType);
+    int runs = (sz.width <= 640) ? 8 : 1;
    TEST_CYCLE_MULTIRUN(runs) cv::compare(src1, src2, dst, cmpType);
    SANITY_CHECK(dst);
 }
--- a/modules/core/perf/perf_convertTo.cpp
+++ b/modules/core/perf/perf_convertTo.cpp
@ -29,9 +29,9 @@ PERF_TEST_P( Size_DepthSrc_DepthDst_Channels_alpha, convertTo,
    Mat src(sz, CV_MAKETYPE(depthSrc, channels));
    randu(src, 0, 255);
    Mat dst(sz, CV_MAKETYPE(depthDst, channels));
    declare.iterations(500);
-    TEST_CYCLE() src.convertTo(dst, depthDst, alpha);
+    int runs = (sz.width <= 640) ? 8 : 1;
    TEST_CYCLE_MULTIRUN(runs) src.convertTo(dst, depthDst, alpha);
    SANITY_CHECK(dst, alpha == 1.0 ? 1e-12 : 1e-7);
 }
--- a/modules/core/perf/perf_mat.cpp
+++ b/modules/core/perf/perf_mat.cpp
@ -18,7 +18,8 @@ PERF_TEST_P(Size_MatType, Mat_Eye,
    declare.out(diagonalMatrix);
-    TEST_CYCLE()
+    int runs = (size.width <= 640) ? 15 : 5;
    TEST_CYCLE_MULTIRUN(runs)
    {
        diagonalMatrix = Mat::eye(size, type);
    }
@ -38,7 +39,8 @@ PERF_TEST_P(Size_MatType, Mat_Zeros,
    declare.out(zeroMatrix);
-    TEST_CYCLE()
+    int runs = (size.width <= 640) ? 15 : 5;
    TEST_CYCLE_MULTIRUN(runs)
    {
        zeroMatrix = Mat::zeros(size, type);
    }
--- a/modules/core/perf/perf_merge.cpp
+++ b/modules/core/perf/perf_merge.cpp
@ -30,7 +30,8 @@ PERF_TEST_P( Size_SrcDepth_DstChannels, merge,
    }
    Mat dst;
-    TEST_CYCLE() merge( (vector<Mat> &)mv, dst );
+    int runs = (sz.width <= 640) ? 8 : 1;
    TEST_CYCLE_MULTIRUN(runs) merge( (vector<Mat> &)mv, dst );
    SANITY_CHECK(dst, 1e-12);
 }
--- a/modules/core/perf/perf_split.cpp
+++ b/modules/core/perf/perf_split.cpp
@ -26,8 +26,8 @@ PERF_TEST_P( Size_Depth_Channels, split,
    randu(m, 0, 255);
    vector<Mat> mv;
-
+    int runs = (sz.width <= 640) ? 8 : 1;
-    TEST_CYCLE() split(m, (vector<Mat>&)mv);
+    TEST_CYCLE_MULTIRUN(runs) split(m, (vector<Mat>&)mv);
    SANITY_CHECK(mv, 1e-12);
 }
--- a/modules/core/perf/perf_stat.cpp
+++ b/modules/core/perf/perf_stat.cpp
@ -97,7 +97,8 @@ PERF_TEST_P(Size_MatType, countNonZero, testing::Combine( testing::Values( TYPIC
    declare.in(src, WARMUP_RNG);
-    TEST_CYCLE() cnt = countNonZero(src);
+    int runs = (sz.width <= 640) ? 8 : 1;
    TEST_CYCLE_MULTIRUN(runs) cnt = countNonZero(src);
    SANITY_CHECK(cnt);
 }
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@ -48,8 +48,8 @@
    #include <cuda_runtime.h>
    #include <npp.h>
-    #define CUDART_MINIMUM_REQUIRED_VERSION 4010
+    #define CUDART_MINIMUM_REQUIRED_VERSION 4020
-    #define NPP_MINIMUM_REQUIRED_VERSION 4100
+    #define NPP_MINIMUM_REQUIRED_VERSION 4200
    #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
        #error "Insufficient Cuda Runtime library version, please update it."
--- a/modules/gpu/doc/feature_detection_and_description.rst
+++ b/modules/gpu/doc/feature_detection_and_description.rst
@ -640,4 +640,3 @@ Converts matrices obtained via :ocv:func:`gpu::BFMatcher_GPU::radiusMatchSingle`
 .. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
 If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -1507,6 +1507,7 @@ public:
    /* returns number of detected objects */
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
    bool findLargestObject;
    bool visualizeInPlace;
@ -1519,9 +1520,6 @@ private:
    struct HaarCascade;
    struct LbpCascade;
    friend class CascadeClassifier_GPU_LBP;
 public:
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
 };
 // ======================== GPU version for soft cascade ===================== //
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@ -778,6 +778,8 @@ NCVStatus loadFromXML(const std::string &filename,
    haar.bNeedsTiltedII = false;
    Ncv32u curMaxTreeDepth;
    std::vector<char> xmlFileCont;
    std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
    haarStages.resize(0);
    haarClassifierNodes.resize(0);
--- a/modules/gpu/src/hough.cpp
+++ b/modules/gpu/src/hough.cpp
@ -121,9 +121,7 @@ void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, f
    buf.accum.setTo(Scalar::all(0));
    DeviceInfo devInfo;
-    cudaDeviceProp prop;
+    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
    cudaSafeCall(cudaGetDeviceProperties(&prop, devInfo.deviceID()));
    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, prop.sharedMemPerBlock, devInfo.supports(FEATURE_SET_COMPUTE_20));
    ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);
@ -196,9 +194,7 @@ void cv::gpu::HoughLinesP(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf,
    buf.accum.setTo(Scalar::all(0));
    DeviceInfo devInfo;
-    cudaDeviceProp prop;
+    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
    cudaSafeCall(cudaGetDeviceProperties(&prop, devInfo.deviceID()));
    linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, prop.sharedMemPerBlock, devInfo.supports(FEATURE_SET_COMPUTE_20));
    ensureSizeIsEnough(1, maxLines, CV_32SC4, lines);
--- a/modules/highgui/include/opencv2/highgui/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui/highgui.hpp
@ -125,7 +125,7 @@ CV_EXPORTS_W void setTrackbarPos(const string& trackbarname, const string& winna
 // OpenGL support
-typedef void (CV_CDECL *OpenGlDrawCallback)(void* userdata);
+typedef void (*OpenGlDrawCallback)(void* userdata);
 CV_EXPORTS void setOpenGlDrawCallback(const string& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0);
 CV_EXPORTS void setOpenGlContext(const string& winname);
--- a/modules/highgui/src/cap_ffmpeg_impl.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl.hpp
@ -560,6 +560,10 @@ bool CvCapture_FFMPEG::open( const char* _filename )
        if( AVMEDIA_TYPE_VIDEO == enc->codec_type && video_stream < 0)
        {
            // backup encoder' width/height
            int enc_width = enc->width;
            int enc_height = enc->height;
            AVCodec *codec = avcodec_find_decoder(enc->codec_id);
            if (!codec ||
 #if LIBAVCODEC_VERSION_INT >= ((53<<16)+(8<<8)+0)
@ -570,6 +574,10 @@ bool CvCapture_FFMPEG::open( const char* _filename )
                < 0)
                goto exit_func;
            // checking width/height (since decoder can sometimes alter it, eg. vp6f)
            if (enc_width && (enc->width != enc_width)) { enc->width = enc_width; }
            if (enc_height && (enc->height != enc_height)) { enc->height = enc_height; }
            video_stream = i;
            video_st = ic->streams[i];
            picture = avcodec_alloc_frame();
--- a/modules/imgproc/perf/perf_cvt_color.cpp
+++ b/modules/imgproc/perf/perf_cvt_color.cpp
@ -299,10 +299,10 @@ PERF_TEST_P(Size_CvtMode2, cvtColorYUV420,
    Mat src(sz.height + sz.height / 2, sz.width, CV_8UC(ch.scn));
    Mat dst(sz, CV_8UC(ch.dcn));
    declare.time(100);
    declare.in(src, WARMUP_RNG).out(dst);
-    TEST_CYCLE() cvtColor(src, dst, mode, ch.dcn);
+    int runs = (sz.width <= 640) ? 8 : 1;
    TEST_CYCLE_MULTIRUN(runs) cvtColor(src, dst, mode, ch.dcn);
    SANITY_CHECK(dst, 1);
 }
--- a/modules/imgproc/perf/perf_matchTemplate.cpp
+++ b/modules/imgproc/perf/perf_matchTemplate.cpp
@ -33,7 +33,8 @@ PERF_TEST_P(ImgSize_TmplSize_Method, matchTemplateSmall,
    declare
        .in(img, WARMUP_RNG)
        .in(tmpl, WARMUP_RNG)
-        .out(result);
+        .out(result)
        .time(30);
    TEST_CYCLE() matchTemplate(img, tmpl, result, method);
@ -66,7 +67,8 @@ PERF_TEST_P(ImgSize_TmplSize_Method, matchTemplateBig,
    declare
        .in(img, WARMUP_RNG)
        .in(tmpl, WARMUP_RNG)
-        .out(result);
+        .out(result)
        .time(30);
    TEST_CYCLE() matchTemplate(img, tmpl, result, method);
--- a/modules/imgproc/perf/perf_threshold.cpp
+++ b/modules/imgproc/perf/perf_threshold.cpp
@ -31,9 +31,9 @@ PERF_TEST_P(Size_MatType_ThreshType, threshold,
    double maxval = theRNG().uniform(1, 254);
    declare.in(src, WARMUP_RNG).out(dst);
    declare.iterations(500);
-    TEST_CYCLE() threshold(src, dst, thresh, maxval, threshType);
+    int runs = (sz.width <= 640) ? 8 : 1;
    TEST_CYCLE_MULTIRUN(runs) threshold(src, dst, thresh, maxval, threshType);
    SANITY_CHECK(dst);
 }
--- a/modules/java/generator/src/java/android+OpenCVLoader.java
+++ b/modules/java/generator/src/java/android+OpenCVLoader.java
@ -17,6 +17,11 @@ public class OpenCVLoader
     */
    public static final String OPENCV_VERSION_2_4_3 = "2.4.3";
    /**
     * OpenCV Library version 2.4.4.
     */
    public static final String OPENCV_VERSION_2_4_4 = "2.4.4";
    /**
     * Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java").
     * @return Returns true is initialization of OpenCV was successful.
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@ -125,6 +125,24 @@ namespace cv
            Impl *impl;
        };
        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
                                                        const char **source, string kernelName,
                                                        size_t globalThreads[3], size_t localThreads[3],
                                                        std::vector< std::pair<size_t, const void *> > &args,
                                                        int channels, int depth, const char *build_options,
                                                        bool finish = true, bool measureKernelTime = false,
                                                        bool cleanUp = true);
        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
                                                        const char **fileName, const int numFiles, string kernelName,
                                                        size_t globalThreads[3], size_t localThreads[3],
                                                        std::vector< std::pair<size_t, const void *> > &args,
                                                        int channels, int depth, const char *build_options,
                                                        bool finish = true, bool measureKernelTime = false,
                                                        bool cleanUp = true);
        class CV_EXPORTS oclMatExpr;
        //////////////////////////////// oclMat ////////////////////////////////
        class CV_EXPORTS oclMat
@ -469,10 +487,11 @@ namespace cv
        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
        //! Logical operators
-        CV_EXPORTS oclMatExpr operator ~ (const oclMat &src);
+        CV_EXPORTS oclMat operator ~ (const oclMat &);
-        CV_EXPORTS oclMatExpr operator | (const oclMat &src1, const oclMat &src2);
+        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
-        CV_EXPORTS oclMatExpr operator & (const oclMat &src1, const oclMat &src2);
+        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
-        CV_EXPORTS oclMatExpr operator ^ (const oclMat &src1, const oclMat &src2);
+        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
        //! Mathematics operators
        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@ -109,5 +109,5 @@ TEST_P(Gemm, Performance)
 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
                            testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
-                            testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
 #endif
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@ -2125,22 +2125,22 @@ void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, c
        bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar);
 }
-oclMatExpr cv::ocl::operator ~ (const oclMat &src)
+oclMat cv::ocl::operator ~ (const oclMat &src)
 {
    return oclMatExpr(src, oclMat(), MAT_NOT);
 }
-oclMatExpr cv::ocl::operator | (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator | (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_OR);
 }
-oclMatExpr cv::ocl::operator & (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator & (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_AND);
 }
-oclMatExpr cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_XOR);
 }
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@ -12,6 +12,7 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
@ -23,6 +24,7 @@
 //    Zhang Ying, zhangying913@gmail.com
 //    Xu Pang, pangxu010@163.com
 //    Wu Zailong, bullet@yeah.net
 //    Wenju He, wenju@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@ -1524,7 +1526,7 @@ namespace cv
            mat_dst.create(mat_src.rows, mat_src.cols, CV_8UC1);
            oclMat mat_hist(1, 256, CV_32SC1);
-            //mat_hist.setTo(0);
+
            calcHist(mat_src, mat_hist);
            Context *clCxt = mat_src.clCxt;
@ -1533,10 +1535,10 @@ namespace cv
            size_t globalThreads[3] = { 256, 1, 1};
            oclMat lut(1, 256, CV_8UC1);
            vector<pair<size_t , const void *> > args;
-            float scale = 255.f / (mat_src.rows * mat_src.cols);
+            int total = mat_src.rows * mat_src.cols;
            args.push_back( make_pair( sizeof(cl_mem), (void *)&lut.data));
            args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_hist.data));
-            args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
+            args.push_back( make_pair( sizeof(int), (void *)&total));
            openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1);
            LUT(mat_src, lut, mat_dst);
        }
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@ -47,6 +47,7 @@
 #include "precomp.hpp"
 #include <iomanip>
 #include <fstream>
 #include "binarycaching.hpp"
 using namespace cv;
@ -730,6 +731,137 @@ namespace cv
 #endif
        }
       double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName,
                                 size_t globalThreads[3], size_t localThreads[3],
                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
                                 bool finish, bool measureKernelTime, bool cleanUp)
        {
            //construct kernel name
            //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
            //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
            stringstream idxStr;
            if(channels != -1)
                idxStr << "_C" << channels;
            if(depth != -1)
                idxStr << "_D" << depth;
            kernelName += idxStr.str();
            cl_kernel kernel;
            kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
            double kernelTime = 0.0;
            if( globalThreads != NULL)
            {
                if ( localThreads != NULL)
                {
                    globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
                    globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
                    globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
                    //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
                    cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
                }
                for(size_t i = 0; i < args.size(); i ++)
                    openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
                if(measureKernelTime == false)
                {
                    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
                                    localThreads, 0, NULL, NULL));
                }
                else
                {
                    cl_event event = NULL;
                    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
                                    localThreads, 0, NULL, &event));
                    cl_ulong end_time, queue_time;
                    openCLSafeCall(clWaitForEvents(1, &event));
                    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
                                    sizeof(cl_ulong), &end_time, 0));
                    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
                                    sizeof(cl_ulong), &queue_time, 0));
                    kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
                    clReleaseEvent(event);
                }
            }
            if(finish)
            {
                clFinish(clCxt->impl->clCmdQueue);
            }
            if(cleanUp)
            {
                openCLSafeCall(clReleaseKernel(kernel));
            }
            return kernelTime;
        }
        // Converts the contents of a file into a string
        static int convertToString(const char *filename, std::string& s)
        {
            size_t size;
            char*  str;
            std::fstream f(filename, (std::fstream::in | std::fstream::binary));
            if(f.is_open())
            {
                size_t fileSize;
                f.seekg(0, std::fstream::end);
                size = fileSize = (size_t)f.tellg();
                f.seekg(0, std::fstream::beg);
                str = new char[size+1];
                if(!str)
                {
                    f.close();
                    return -1;
                }
                f.read(str, fileSize);
                f.close();
                str[size] = '\0';
                s = str;
                delete[] str;
                return 0;
            }
            printf("Error: Failed to open file %s\n", filename);
            return -1;
        }
        double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName,
                                 size_t globalThreads[3], size_t localThreads[3],
                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
                                 bool finish, bool measureKernelTime, bool cleanUp)
        {
            std::vector<std::string> fsource;
            for (int i = 0 ; i < numFiles ; i++)
            {
                std::string str;
                if (convertToString(fileName[i], str) >= 0)
                    fsource.push_back(str);
            }
            const char **source = new const char *[numFiles];
            for (int i = 0 ; i < numFiles ; i++)
                source[i] = fsource[i].c_str();
            double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads,
                                 args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
            fsource.clear();
            delete []source;
            return kernelTime;
        }
       cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
                             const size_t size)
        {
--- a/modules/ocl/src/kernels/imgproc_histogram.cl
+++ b/modules/ocl/src/kernels/imgproc_histogram.cl
@ -3,12 +3,14 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //    Niko Li, newlife20080214@gmail.com
 //    Jia Haipeng, jiahaipeng95@gmail.com
 //    Xu Pang, pangxu010@163.com
 //    Wenju He, wenju@multicorewareinc.com
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@ -189,24 +191,27 @@ __kernel __attribute__((reqd_work_group_size(256,1,1)))void merge_hist(__global
 __kernel __attribute__((reqd_work_group_size(256,1,1)))void calLUT(
                            __global uchar * dst,
                            __constant int * hist,
-                            float scale)
+                            int total)
 {
    int lid = get_local_id(0);
-    __local int sumhist[HISTOGRAM256_BIN_COUNT];
+    __local int sumhist[HISTOGRAM256_BIN_COUNT+1];
    //__local uchar lut[HISTOGRAM256_BIN_COUNT+1];
    sumhist[lid]=hist[lid];
    barrier(CLK_LOCAL_MEM_FENCE);
    if(lid==0)
    {
        int sum = 0;
-        for(int i=0;i<HISTOGRAM256_BIN_COUNT;i++)
+        int i = 0;
        while (!sumhist[i]) ++i;
        sumhist[HISTOGRAM256_BIN_COUNT] = sumhist[i];
        for(sumhist[i++] = 0; i<HISTOGRAM256_BIN_COUNT; i++)
        {
            sum+=sumhist[i];
            sumhist[i]=sum;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    float scale = 255.f/(total - sumhist[HISTOGRAM256_BIN_COUNT]);
    dst[lid]= lid == 0 ? 0 : convert_uchar_sat(convert_float(sumhist[lid])*scale);
 }
 /*
--- a/modules/ocl/src/kernels/meanShift.cl
+++ b/modules/ocl/src/kernels/meanShift.cl
@ -12,11 +12,13 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //    Shengen Yan,yanshengen@gmail.com
 //    Xu Pang, pangxu010@163.com
 //    Wenju He, wenju@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@ -43,12 +45,6 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #if defined (DOUBLE_SUPPORT)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 typedef double F;
 #else
 typedef float F;
 #endif
 short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
               __global uchar4* in, int in_step, int dst_off, int src_off,
@ -184,12 +180,11 @@ short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
        if( count == 0 )
            break;
-        F  icount = 1.0/count;
+        int x1 = sx/count;
-        int x1 = convert_int_rtz(sx*icount);
+        int y1 = sy/count;
-        int y1 = convert_int_rtz(sy*icount);
+        s.x = s.x/count;
-        s.x = convert_int_rtz(s.x*icount);
+        s.y = s.y/count;
-        s.y = convert_int_rtz(s.y*icount);
+        s.z = s.z/count;
        s.z = convert_int_rtz(s.z*icount);
        int4 tmp = s - convert_int4(c);
        int norm2 = tmp.x * tmp.x + tmp.y *  tmp.y +
--- a/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@ -46,6 +46,9 @@
 #include "mcwutil.hpp"
 #if defined (HAVE_OPENCL)
 #ifndef CL_VERSION_1_2
 #define CL_VERSION_1_2 0
 #endif
 using namespace std;
@ -123,6 +126,101 @@ namespace cv
            openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
                                  build_options, finish_mode);
        }
       cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
            cl_image_format format;
            int err;
            int depth    = mat.depth();
            int channels = mat.channels();
            switch(depth)
            {
            case CV_8U:
                format.image_channel_data_type = CL_UNSIGNED_INT8;
                break;
            case CV_32S:
                format.image_channel_data_type = CL_UNSIGNED_INT32;
                break;
            case CV_32F:
                format.image_channel_data_type = CL_FLOAT;
                break;
            default:
                throw std::exception();
                break;
            }
            switch(channels)
            {
            case 1:
                format.image_channel_order     = CL_R;
                break;
            case 3:
                format.image_channel_order     = CL_RGB;
                break;
            case 4:
                format.image_channel_order     = CL_RGBA;
                break;
            default:
                throw std::exception();
                break;
            }
 #if CL_VERSION_1_2
            cl_image_desc desc;
            desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
            desc.image_width      = mat.cols;
            desc.image_height     = mat.rows;
            desc.image_depth      = 0;
            desc.image_array_size = 1;
            desc.image_row_pitch  = 0;
            desc.image_slice_pitch = 0;
            desc.buffer           = NULL;
            desc.num_mip_levels   = 0;
            desc.num_samples      = 0;
            texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
 #else
            texture = clCreateImage2D(
                mat.clCxt->impl->clContext,
                CL_MEM_READ_WRITE,
                &format,
                mat.cols,
                mat.rows,
                0,
                NULL,
                &err);
 #endif
            size_t origin[] = { 0, 0, 0 };
            size_t region[] = { mat.cols, mat.rows, 1 };
            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
                devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
            }
            else
            {
                devData = (cl_mem)mat.data;
            }
            clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
                clFinish(mat.clCxt->impl->clCmdQueue);
                clReleaseMemObject(devData);
            }
            openCLSafeCall(err);
            return texture;
        }
        void releaseTexture(cl_mem& texture)
        {
            openCLFree(texture);
        }
    }//namespace ocl
 }//namespace cv
--- a/modules/ocl/src/mcwutil.hpp
+++ b/modules/ocl/src/mcwutil.hpp
@ -67,6 +67,12 @@ namespace cv
        void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
                                  size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
                                  int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
        // bind oclMat to OpenCL image textures
        // note:
        //   1. there is no memory management. User need to explicitly release the resource
        //   2. for faster clamping, there is no buffer padding for the constructed texture
        cl_mem bindTexture(const oclMat &mat);
        void releaseTexture(cl_mem& texture);
    }//namespace ocl
 }//namespace cv
--- a/modules/ocl/src/pyrlk.cpp
+++ b/modules/ocl/src/pyrlk.cpp
@ -10,10 +10,15 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //		Dachuan Zhao, dachuan@multicorewareinc.com
 //		Yao Wang, yao@multicorewareinc.com
 //      Nathan, liujun@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@ -22,13 +27,13 @@
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
+//     and/or other oclMaterials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
+// any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
@ -40,6 +45,7 @@
 //
 //M*/
 #include "precomp.hpp"
 #include "mcwutil.hpp"
 using namespace std;
@ -568,197 +574,16 @@ static void pyrDown_cus(const oclMat &src, oclMat &dst)
    pyrdown_run_cus(src, dst);
 }
 //struct MultiplyScalar
 //{
 //    MultiplyScalar(double val_, double scale_) : val(val_), scale(scale_) {}
 //    double operator ()(double a) const
 //    {
 //        return (scale * a * val);
 //    }
 //    const double val;
 //    const double scale;
 //};
 //
 //void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
 //{
 //	Mat srcTemp;
 //	Mat dstTemp;
 //	src.download(srcTemp);
 //	dst.download(dstTemp);
 //
 //	int i;
 //	int j;
 //	int k;
 //	for(i = 0; i < srcTemp.rows; i++)
 //	{
 //		for(j = 0; j < srcTemp.cols; j++)
 //		{
 //			for(k = 0; k < srcTemp.channels(); k++)
 //			{
 //				((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]);
 //			}
 //		}
 //	}
 //
 //	dst = dstTemp;
 //}
 //
 //static inline bool isAligned(const unsigned char* ptr, size_t size)
 //{
 //    return reinterpret_cast<size_t>(ptr) % size == 0;
 //}
 //
 //static inline bool isAligned(size_t step, size_t size)
 //{
 //    return step % size == 0;
 //}
 //
 //void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
 //{
 //    if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
 //        !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
 //    {
 //        callF(src, dst, op, mask);
 //        return;
 //    }
 //
 //	Mat srcTemp;
 //	Mat dstTemp;
 //	src.download(srcTemp);
 //	dst.download(dstTemp);
 //
 //	int x_shifted;
 //
 //	int i;
 //	int j;
 //	for(i = 0; i < srcTemp.rows; i++)
 //	{
 //		const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows;
 //        double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;;
 //
 //		for(j = 0; j < srcTemp.cols; j++)
 //		{
 //			x_shifted = j * 4;
 //
 //			if(x_shifted + 4 - 1 < srcTemp.cols)
 //			{
 //				dstRow[x_shifted    ] = op(srcRow[x_shifted    ]);
 //				dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]);
 //				dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]);
 //				dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]);
 //			}
 //			else
 //			{
 //				for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x)
 //				{
 //					((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]);
 //				}
 //			}
 //		}
 //	}
 //}
 //
 //void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f);
 //void multiply(const oclMat& src1, double val, oclMat& dst, double scale)
 //{
 //    MultiplyScalar op(val, scale);
 //	//if(src1.channels() == 1 && dst.channels() == 1)
 //	//{
 //	//    callT(src1, dst, op, 0);
 //	//}
 //	//else
 //	//{
 //	    callF(src1, dst, op, 0);
 //	//}
 //}
 static cl_mem bindTexture(const oclMat &mat, int depth, int channels)
 {
    cl_mem texture;
    cl_image_format format;
    int err;
    if(depth == 0)
    {
        format.image_channel_data_type = CL_UNSIGNED_INT8;
    }
    else if(depth == 5)
    {
        format.image_channel_data_type = CL_FLOAT;
    }
    if(channels == 1)
    {
        format.image_channel_order     = CL_R;
    }
    else if(channels == 3)
    {
        format.image_channel_order     = CL_RGB;
    }
    else if(channels == 4)
    {
        format.image_channel_order     = CL_RGBA;
    }
 #ifdef CL_VERSION_1_2
    cl_image_desc desc;
    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
    desc.image_width      = mat.step / mat.elemSize();
    desc.image_height     = mat.rows;
    desc.image_depth      = 0;
    desc.image_array_size = 1;
    desc.image_row_pitch  = 0;
    desc.image_slice_pitch = 0;
    desc.buffer           = NULL;
    desc.num_mip_levels   = 0;
    desc.num_samples      = 0;
    texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
 #else
    texture = clCreateImage2D(
                  mat.clCxt->impl->clContext,
                  CL_MEM_READ_WRITE,
                  &format,
                  mat.step / mat.elemSize(),
                  mat.rows,
                  0,
                  NULL,
                  &err);
 #endif
    size_t origin[] = { 0, 0, 0 };
    size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
    clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
    openCLSafeCall(err);
    return texture;
 }
 static void releaseTexture(cl_mem texture)
 {
    openCLFree(texture);
 }
 static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
 {
    Context  *clCxt = I.clCxt;
    char platform[256] = {0};
    cl_platform_id pid;
    clGetDeviceInfo(clCxt->impl->devices, CL_DEVICE_PLATFORM, sizeof(pid), &pid, NULL);
    clGetPlatformInfo(pid, CL_PLATFORM_NAME, 256, platform, NULL);
    std::string namestr = platform;
    bool isImageSupported = true;
    if(namestr.find("NVIDIA")!=string::npos || namestr.find("Intel")!=string::npos)
        isImageSupported = false;
    int elemCntPerRow = I.step / I.elemSize();
    string kernelName = "lkSparse";
-
+    size_t localThreads[3]  = { 8, 8, 1 };
-
+    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
    size_t localThreads[3]  = { 8, isImageSupported?8:32, 1 };
    size_t globalThreads[3] = { 8 * ptcount, isImageSupported?8:32, 1};
    int cn = I.oclchannels();
    char calcErr;
    if (level == 0)
    {
@ -770,22 +595,11 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    }
    vector<pair<size_t , const void *> > args;
-    cl_mem ITex;
+    cl_mem ITex = bindTexture(I);
-    cl_mem JTex;
+    cl_mem JTex = bindTexture(J);
    if (isImageSupported)
    {
        ITex = bindTexture(I, I.depth(), cn);
        JTex = bindTexture(J, J.depth(), cn);
    }
    else
    {
        ITex = (cl_mem)I.data;
        JTex = (cl_mem)J.data;
    }
    args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
    //cl_mem clmD = clCreateBuffer(clCxt, CL_MEM_READ_WRITE, ptcount * sizeof(float), NULL, NULL);
    args.push_back( make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
@ -795,10 +609,6 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
    if (!isImageSupported)
    {
        args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    }
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
@ -806,18 +616,20 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
    //args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));
-    if (isImageSupported)
+    try
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
-
+    }
    catch(Exception&)
    {
        printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
        releaseTexture(ITex);
        releaseTexture(JTex);
-    }
+        ITex = (cl_mem)I.data;
-    else
+        JTex = (cl_mem)J.data;
-    {
+        localThreads[1] = globalThreads[1] = 32;
-        //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
+        args.insert( args.begin()+11, make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
 }
@ -927,8 +739,6 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
    size_t localThreads[3]  = { 16, 16, 1 };
    size_t globalThreads[3] = { I.cols, I.rows, 1};
    int cn = I.oclchannels();
    bool calcErr;
    if (err)
    {
@ -944,8 +754,8 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
    if (isImageSupported)
    {
-        ITex = bindTexture(I, I.depth(), cn);
+        ITex = bindTexture(I);
-        JTex = bindTexture(J, J.depth(), cn);
+        JTex = bindTexture(J);
    }
    else
    {
--- a/modules/ocl/test/test_gemm.cpp
+++ b/modules/ocl/test/test_gemm.cpp
@ -81,5 +81,5 @@ TEST_P(Gemm, Accuracy)
 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
                            testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
-                            testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
 #endif
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@ -183,12 +183,11 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
        if( count == 0 )
            break;
-        double icount = 1.0 / count;
+        int x1 = sx / count;
-        int x1 = cvFloor(sx * icount);
+        int y1 = sy / count;
-        int y1 = cvFloor(sy * icount);
+        s0 = s0 / count;
-        s0 = cvFloor(s0 * icount);
+        s1 = s1 / count;
-        s1 = cvFloor(s1 * icount);
+        s2 = s2 / count;
        s2 = cvFloor(s2 * icount);
        bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
@ -1370,9 +1369,7 @@ TEST_P(meanShiftFiltering, Mat)
        gdst.download(cpu_gdst);
        char sss[1024];
        char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
        sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
        strcat(sss, warning);
        EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
    }
@ -1398,9 +1395,7 @@ TEST_P(meanShiftProc, Mat)
        gdstCoor.download(cpu_gdstCoor);
        char sss[1024];
        char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
        sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
        strcat(sss, warning);
        EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
        EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0, sss);
    }
--- a/modules/ts/src/ts_arrtest.cpp
+++ b/modules/ts/src/ts_arrtest.cpp
@ -296,37 +296,15 @@ int ArrayTest::validate_test_results( int test_case_idx )
        for( j = 0; j < sizei; j++ )
        {
            double err_level;
            vector<int> idx;
            double max_diff = 0;
            int code;
            char msg[100];
            if( !test_array[i1][j] )
                continue;
            err_level = get_success_error_level( test_case_idx, i0, (int)j );
-            code = cmpEps( test_mat[i0][j], test_mat[i1][j], &max_diff, err_level, &idx, element_wise_relative_error );
+            code = cmpEps2(ts, test_mat[i0][j], test_mat[i1][j], err_level, element_wise_relative_error, arr_names[i0]);
-            switch( code )
+            if (code == 0) continue;
            {
            case -1:
                sprintf( msg, "Too big difference (=%g)", max_diff );
                code = TS::FAIL_BAD_ACCURACY;
                break;
            case -2:
                strcpy( msg, "Invalid output" );
                code = TS::FAIL_INVALID_OUTPUT;
                break;
            case -3:
                strcpy( msg, "Invalid output in the reference array" );
                code = TS::FAIL_INVALID_OUTPUT;
                break;
            default:
                continue;
            }
            string idxstr = vec2str(", ", &idx[0], idx.size());
            ts->printf( TS::LOG, "%s in %s array %d at (%s)", msg, arr_names[i0], j, idxstr.c_str() );
            for( i0 = 0; i0 < (int)test_array.size(); i0++ )
            {
--- a/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@ -1934,6 +1934,10 @@ int check( const Mat& a, double fmin, double fmax, vector<int>* _idx )
    return idx == 0 ? 0 : -1;
 }
 #define CMP_EPS_OK 0
 #define CMP_EPS_BIG_DIFF -1
 #define CMP_EPS_INVALID_TEST_DATA -2 // there is NaN or Inf value in test data
 #define CMP_EPS_INVALID_REF_DATA -3 // there is NaN or Inf value in reference data
 // compares two arrays. max_diff is the maximum actual difference,
 // success_err_level is maximum allowed difference, idx is the index of the first
@ -1946,7 +1950,7 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
    CV_Assert( arr.type() == refarr.type() && arr.size == refarr.size );
    int ilevel = refarr.depth() <= CV_32S ? cvFloor(success_err_level) : 0;
-    int result = 0;
+    int result = CMP_EPS_OK;
    const Mat *arrays[]={&arr, &refarr, 0};
    Mat planes[2];
@ -1998,13 +2002,13 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
                    continue;
                if( cvIsNaN(a_val) || cvIsInf(a_val) )
                {
-                    result = -2;
+                    result = CMP_EPS_INVALID_TEST_DATA;
                    idx = startidx + j;
                    break;
                }
                if( cvIsNaN(b_val) || cvIsInf(b_val) )
                {
-                    result = -3;
+                    result = CMP_EPS_INVALID_REF_DATA;
                    idx = startidx + j;
                    break;
                }
@ -2029,13 +2033,13 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
                    continue;
                if( cvIsNaN(a_val) || cvIsInf(a_val) )
                {
-                    result = -2;
+                    result = CMP_EPS_INVALID_TEST_DATA;
                    idx = startidx + j;
                    break;
                }
                if( cvIsNaN(b_val) || cvIsInf(b_val) )
                {
-                    result = -3;
+                    result = CMP_EPS_INVALID_REF_DATA;
                    idx = startidx + j;
                    break;
                }
@ -2051,7 +2055,7 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
            break;
        default:
            assert(0);
-            return -1;
+            return CMP_EPS_BIG_DIFF;
        }
        if(_realmaxdiff)
            *_realmaxdiff = MAX(*_realmaxdiff, realmaxdiff);
@ -2060,7 +2064,7 @@ int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
    }
    if( result == 0 && idx != 0 )
-        result = -1;
+        result = CMP_EPS_BIG_DIFF;
    if( result < -1 && _realmaxdiff )
        *_realmaxdiff = exp(1000.);
@ -2081,15 +2085,15 @@ int cmpEps2( TS* ts, const Mat& a, const Mat& b, double success_err_level,
    switch( code )
    {
-    case -1:
+    case CMP_EPS_BIG_DIFF:
        sprintf( msg, "%s: Too big difference (=%g)", desc, diff );
        code = TS::FAIL_BAD_ACCURACY;
        break;
-    case -2:
+    case CMP_EPS_INVALID_TEST_DATA:
        sprintf( msg, "%s: Invalid output", desc );
        code = TS::FAIL_INVALID_OUTPUT;
        break;
-    case -3:
+    case CMP_EPS_INVALID_REF_DATA:
        sprintf( msg, "%s: Invalid reference output", desc );
        code = TS::FAIL_INVALID_OUTPUT;
        break;
--- a/modules/video/perf/perf_tvl1optflow.cpp
+++ b/modules/video/perf/perf_tvl1optflow.cpp
@ -13,7 +13,7 @@ pair<string, string> impair(const char* im1, const char* im2)
 PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, testing::Values(impair("cv/optflow/RubberWhale1.png", "cv/optflow/RubberWhale2.png")))
 {
-    declare.time(40);
+    declare.time(260);
    Mat frame1 = imread(getDataPath(GetParam().first), IMREAD_GRAYSCALE);
    Mat frame2 = imread(getDataPath(GetParam().second), IMREAD_GRAYSCALE);
--- a/samples/gpu/morphology.cpp
+++ b/samples/gpu/morphology.cpp
@ -640,4 +640,3 @@ Converts matrices obtained via :ocv:func:`gpu::BFMatcher_GPU::radiusMatchSingle`
	`.. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)`	`.. ocv:function:: void gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)`

	If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.	If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.