merged 2.4 into trunk

2012-04-30 14:33:52 +00:00
parent 3f1c6d7357
commit d5a0088bbe
194 changed files with 10158 additions and 8225 deletions
--- a/modules/core/doc/basic_structures.rst
+++ b/modules/core/doc/basic_structures.rst
@@ -171,8 +171,49 @@ RotatedRect
 -----------
 .. ocv:class:: RotatedRect

-Template class for rotated rectangles specified by the center, size, and the rotation angle in degrees.
+The class represents rotated (i.e. not up-right) rectangles on a plane. Each rectangle is specified by the center point (mass center), length of each side (represented by cv::Size2f structure) and the rotation angle in degrees.

+    .. ocv:function:: RotatedRect::RotatedRect()
+    .. ocv:function:: RotatedRect::RotatedRect(const Point2f& center, const Size2f& size, float angle)
+    .. ocv:function:: RotatedRect::RotatedRect(const CvBox2D& box)
+
+        :param center: The rectangle mass center.
+        :param size: Width and height of the rectangle.
+        :param angle: The rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle.
+        :param box: The rotated rectangle parameters as the obsolete CvBox2D structure.
+
+    .. ocv:function:: void RotatedRect::points(Point2f* pts) const
+    .. ocv:function:: Rect RotatedRect::boundingRect() const
+    .. ocv:function:: RotatedRect::operator CvBox2D() const
+
+        :param pts: The points array for storing rectangle vertices.
+
+The sample below demonstrates how to use RotatedRect:
+
+::
+
+    Mat image(200, 200, CV_8UC3, Scalar(0));
+    RotatedRect rRect = RotatedRect(Point2f(100,100), Size2f(100,50), 30);
+
+    Point2f vertices[4];
+    rRect.points(vertices);
+    for (int i = 0; i < 4; i++)
+        line(image, vertices[i], vertices[(i+1)%4], Scalar(0,255,0));
+
+    Rect brect = rRect.boundingRect();
+    rectangle(image, brect, Scalar(255,0,0));
+
+    imshow("rectangles", image);
+    waitKey(0);
+
+.. image:: pics/rotatedrect.png
+
+.. seealso::
+
+    :ocv:cfunc:`CamShift`,
+    :ocv:func:`fitEllipse`,
+    :ocv:func:`minAreaRect`,
+    :ocv:struct:`CvBox2D`

 TermCriteria
 ------------
@@ -1773,6 +1814,84 @@ To use ``Mat_`` for multi-channel images/matrices, pass ``Vec`` as a ``Mat_`` pa
            img(i,j)[2] ^= (uchar)(i ^ j);


+InputArray
+----------
+
+This is the proxy class for passing read-only input arrays into OpenCV functions. It is defined as ::
+
+    typedef const _InputArray& InputArray;
+    
+where ``_InputArray`` is a class that can be constructed from ``Mat``, ``Mat_<T>``, ``Matx<T, m, n>``, ``std::vector<T>``, ``std::vector<std::vector<T> >`` or ``std::vector<Mat>``. It can also be constructed from a matrix expression.
+
+Since this is mostly implementation-level class, and its interface may change in future versions, we do not describe it in details. There are a few key things, though, that should be kept in mind:
+
+  * When you see in the reference manual or in OpenCV source code a function that takes ``InputArray``, it means that you can actually pass ``Mat``, ``Matx``, ``vector<T>`` etc. (see above the complete list).
+  
+  * Optional input arguments: If some of the input arrays may be empty, pass ``cv::noArray()`` (or simply ``cv::Mat()`` as you probably did before).
+
+  * The class is designed solely for passing parameters. That is, normally you *should not* declare class members, local and global variables of this type.
+  
+  * If you want to design your own function or a class method that can operate of arrays of multiple types, you can use ``InputArray`` (or ``OutputArray``) for the respective parameters. Inside a function you should use ``_InputArray::getMat()`` method to construct a matrix header for the array (without copying data). ``_InputArray::kind()`` can be used to distinguish ``Mat`` from ``vector<>`` etc., but normally it is not needed.
+  
+Here is how you can use a function that takes ``InputArray`` ::
+
+    std::vector<Point2f> vec;
+    // points or a circle
+    for( int i = 0; i < 30; i++ )
+        vec.push_back(Point2f((float)(100 + 30*cos(i*CV_PI*2/5)),
+                              (float)(100 - 30*sin(i*CV_PI*2/5))));
+    cv::transform(vec, vec, cv::Matx23f(0.707, -0.707, 10, 0.707, 0.707, 20));
+
+That is, we form an STL vector containing points, and apply in-place affine transformation to the vector using the 2x3 matrix created inline as ``Matx<float, 2, 3>`` instance.
+
+Here is how such a function can be implemented (for simplicity, we implement a very specific case of it, according to the assertion statement inside) ::
+
+    void myAffineTransform(InputArray _src, OutputArray _dst, InputArray _m)
+    {
+        // get Mat headers for input arrays. This is O(1) operation,
+        // unless _src and/or _m are matrix expressions.
+        Mat src = _src.getMat(), m = _m.getMat();
+        CV_Assert( src.type() == CV_32FC2 && m.type() == CV_32F && m.size() == Size(3, 2) );
+        
+        // [re]create the output array so that it has the proper size and type.
+        // In case of Mat it calls Mat::create, in case of STL vector it calls vector::resize.
+        _dst.create(src.size(), src.type());
+        Mat dst = _dst.getMat();
+        
+        for( int i = 0; i < src.rows; i++ )
+            for( int j = 0; j < src.cols; j++ )
+            {
+                Point2f pt = src.at<Point2f>(i, j);
+                dst.at<Point2f>(i, j) = Point2f(m.at<float>(0, 0)*pt.x +
+                                                m.at<float>(0, 1)*pt.y +
+                                                m.at<float>(0, 2),
+                                                m.at<float>(1, 0)*pt.x +
+                                                m.at<float>(1, 1)*pt.y +
+                                                m.at<float>(1, 2));
+            }
+    }
+
+There is another related type, ``InputArrayOfArrays``, which is currently defined as a synonym for ``InputArray``: ::
+
+    typedef InputArray InputArrayOfArrays;
+    
+It denotes function arguments that are either vectors of vectors or vectors of matrices. A separate synonym is needed to generate Python/Java etc. wrappers properly. At the function implementation level their use is similar, but ``_InputArray::getMat(idx)`` should be used to get header for the idx-th component of the outer vector and ``_InputArray::size().area()`` should be used to find the number of components (vectors/matrices) of the outer vector.
+
+
+OutputArray
+-----------
+
+This type is very similar to ``InputArray`` except that it is used for input/output and output function parameters. Just like with ``InputArray``, OpenCV users should not care about ``OutputArray``, they just pass ``Mat``, ``vector<T>`` etc. to the functions. The same limitation as for ``InputArray``: **Do not explicitly create OutputArray instances** applies here too.
+
+If you want to make your function polymorphic (i.e. accept different arrays as output parameters), it is also not very difficult. Take the sample above as the reference. Note that ``_OutputArray::create()`` needs to be called before ``_OutputArray::getMat()``. This way you guarantee that the output array is properly allocated.
+
+Optional output parameters. If you do not need certain output array to be computed and returned to you, pass ``cv::noArray()``, just like you would in the case of optional input array. At the implementation level, use ``_OutputArray::needed()`` to check if certain output array needs to be computed or not.
+
+There are several synonyms for ``OutputArray`` that are used to assist automatic Python/Java/... wrapper generators: ::
+
+    typedef OutputArray OutputArrayOfArrays;
+    typedef OutputArray InputOutputArray;
+    typedef OutputArray InputOutputArrayOfArrays;

 NAryMatIterator
 ---------------
@@ -2189,3 +2308,160 @@ It simplifies notation of some operations. ::
    M.ref(1, 2, 3) = M(4, 5, 6) + M(7, 8, 9);


+Algorithm
+---------
+
+This is a base class for all more or less complex algorithms in OpenCV, especially for classes of algorithms, for which there can be multiple implementations. The examples are stereo correspondence (for which there are algorithms like block matching, semi-global block matching, graph-cut etc.), background subtraction (which can be done using mixture-of-gaussians models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck etc.).
+
+The class provides the following features for all derived classes:
+
+    * so called "virtual constructor". That is, each Algorithm derivative is registered at program start and you can get the list of registered algorithms and create instance of a particular algorithm by its name (see ``Algorithm::create``). If you plan to add your own algorithms, it is good practice to add a unique prefix to your algorithms to distinguish them from other algorithms.
+    
+    * setting/retrieving algorithm parameters by name. If you used video capturing functionality from OpenCV highgui module, you are probably familar with ``cvSetCaptureProperty()``, ``cvGetCaptureProperty()``, ``VideoCapture::set()`` and ``VideoCapture::get()``. ``Algorithm`` provides similar method where instead of integer id's you specify the parameter names as text strings. See ``Algorithm::set`` and ``Algorithm::get`` for details.
+
+    * reading and writing parameters from/to XML or YAML files. Every Algorithm derivative can store all its parameters and then read them back. There is no need to re-implement it each time.
+    
+Here is example of SIFT use in your application via Algorithm interface: ::
+
+    #include "opencv2/opencv.hpp"
+
+    ...
+
+    initModule_nonfree(); // to load SURF/SIFT etc.
+    
+    Ptr<Feature2D> sift = Algorithm::create<Feature2D>("Feature2D.SIFT");
+    
+    FileStorage fs("sift_params.xml", FileStorage::READ);
+    if( fs.isOpened() ) // if we have file with parameters, read them
+    {
+        sift.read(fs["sift_params"]);
+        fs.release();
+    }
+    else // else modify the parameters and store them; user can later edit the file to use different parameters
+    {
+        sift.set("contrastThreshold", 0.01f); // lower the contrast threshold, compared to the default value
+        
+        {
+        WriteStructContext ws(fs, "sift_params", CV_NODE_MAP);
+        sift.write(fs);
+        }
+    }
+    
+    Mat image = imread("myimage.png", 0), descriptors;
+    vector<KeyPoint> keypoints;
+    sift(image, noArray(), keypoints, descriptors);
+    
+
+Algorithm::get
+--------------
+Returns the algorithm parameter
+
+.. ocv:function:: template<typename _Tp> typename ParamType<_Tp>::member_type get(const string& name) const
+
+    :param name: The parameter name.
+
+The method returns value of the particular parameter. Since the compiler can not deduce the type of the returned parameter, you should specify it explicitly in angle brackets. Here are the allowed forms of get:
+
+    * myalgo.get<int>("param_name")
+    * myalgo.get<double>("param_name")
+    * myalgo.get<bool>("param_name")
+    * myalgo.get<string>("param_name")
+    * myalgo.get<Mat>("param_name")
+    * myalgo.get<vector<Mat> >("param_name")
+    * myalgo.get<Algorithm>("param_name") (it returns Ptr<Algorithm>).
+
+In some cases the actual type of the parameter can be cast to the specified type, e.g. integer parameter can be cast to double, ``bool`` can be cast to ``int``. But "dangerous" transformations (string<->number, double->int, 1x1 Mat<->number, ...) are not performed and the method will throw an exception. In the case of ``Mat`` or ``vector<Mat>`` parameters the method does not clone the matrix data, so do not modify the matrices. Use ``Algorithm::set`` instead - slower, but more safe.
+
+
+Algorithm::set
+--------------
+Sets the algorithm parameter
+
+.. ocv:function:: void set(const string& name, int value)
+.. ocv:function:: void set(const string& name, double value)
+.. ocv:function:: void set(const string& name, bool value)
+.. ocv:function:: void set(const string& name, const string& value)
+.. ocv:function:: void set(const string& name, const Mat& value)
+.. ocv:function:: void set(const string& name, const vector<Mat>& value)
+.. ocv:function:: void set(const string& name, const Ptr<Algorithm>& value)
+
+    :param name: The parameter name.
+    :param value: The parameter value.
+
+The method sets value of the particular parameter. Some of the algorithm parameters may be declared as read-only. If you try to set such a parameter, you will get exception with the corresponding error message.
+
+
+Algorithm::write
+----------------
+Stores algorithm parameters in a file storage
+
+.. ocv:function:: void write(FileStorage& fs) const
+
+    :param fs: File storage.
+    
+The method stores all the algorithm parameters (in alphabetic order) to the file storage. The method is virtual. If you define your own Algorithm derivative, your can override the method and store some extra information. However, it's rarely needed. Here are some examples:
+
+ * SIFT feature detector (from nonfree module). The class only stores algorithm parameters and no keypoints or their descriptors. Therefore, it's enough to store the algorithm parameters, which is what ``Algorithm::write()`` does. Therefore, there is no dedicated ``SIFT::write()``.
+ 
+ * Background subtractor (from video module). It has the algorithm parameters and also it has the current background model. However, the background model is not stored. First, it's rather big. Then, if you have stored the background model, it would likely become irrelevant on the next run (because of shifted camera, changed background, different lighting etc.). Therefore, ``BackgroundSubtractorMOG`` and ``BackgroundSubtractorMOG2`` also rely on the standard ``Algorithm::write()`` to store just the algorithm parameters.
+ 
+ * Expectation Maximization (from ml module). The algorithm finds mixture of gaussians that approximates user data best of all. In this case the model may be re-used on the next run to test new data against the trained statistical model. So EM needs to store the model. However, since the model is described by a few parameters that are available as read-only algorithm parameters (i.e. they are available via ``EM::get()``), EM also relies on ``Algorithm::write()`` to store both EM parameters and the model (represented by read-only algorithm parameters).
+ 
+
+Algorithm::read
+---------------
+Reads algorithm parameters from a file storage
+
+.. ocv:function:: void read(const FileNode& fn)
+
+    :param fn: File node of the file storage.
+    
+The method reads all the algorithm parameters from the specified node of a file storage. Similarly to ``Algorithm::write()``, if you implement an algorithm that needs to read some extra data and/or re-compute some internal data, you may override the method.
+
+Algorithm::getList
+------------------
+Returns the list of registered algorithms
+
+.. ocv:function:: void read(vector<string>& algorithms)
+
+    :param algorithms: The output vector of algorithm names.
+    
+This static method returns the list of registered algorithms in alphabetical order.
+
+
+Algorithm::getList
+------------------
+Returns the list of registered algorithms
+
+.. ocv:function:: void read(vector<string>& algorithms)
+
+    :param algorithms: The output vector of algorithm names.
+    
+This static method returns the list of registered algorithms in alphabetical order.
+
+
+Algorithm::create
+-----------------
+Creates algorithm instance by name
+
+.. ocv:function:: template<typename _Tp> Ptr<_Tp> create(const string& name)
+
+    :param name: The algorithm name, one of the names returned by ``Algorithm::getList()``.
+    
+This static method creates a new instance of the specified algorithm. If there is no such algorithm, the method will silently return null pointer (that can be checked by ``Ptr::empty()`` method). Also, you should specify the particular ``Algorithm`` subclass as ``_Tp`` (or simply ``Algorithm`` if you do not know it at that point). ::
+
+    Ptr<BackgroundSubtractor> bgfg = Algorithm::create<BackgroundSubtractor>("BackgroundSubtractor.MOG2");
+    
+.. note:: This is important note about seemingly mysterious behavior of ``Algorithm::create()`` when it returns NULL while it should not. The reason is simple - ``Algorithm::create()`` resides in OpenCV`s core module and the algorithms are implemented in other modules. If you create algorithms dynamically, C++ linker may decide to throw away the modules where the actual algorithms are implemented, since you do not call any functions from the modules. To avoid this problem, you need to call ``initModule_<modulename>();`` somewhere in the beginning of the program before ``Algorithm::create()``. For example, call ``initModule_nonfree()`` in order to use SURF/SIFT, call ``initModule_ml()`` to use expectation maximization etc.
+    
+Creating Own Algorithms
+-----------------------
+
+The above methods are usually enough for users. If you want to make your own algorithm, derived from ``Algorithm``, you should basically follow a few conventions and add a little semi-standard piece of code to your class:
+
+ * Make a class and specify ``Algorithm`` as its base class.
+ * The algorithm parameters should be the class members. See ``Algorithm::get()`` for the list of possible types of the parameters.
+ * Add public virtual method ``AlgorithmInfo* info() const;`` to your class.
+ * Add constructor function, ``AlgorithmInfo`` instance and implement the ``info()`` method. The simplest way is to take  http://code.opencv.org/svn/opencv/trunk/opencv/modules/ml/src/ml_init.cpp as the reference and modify it according to the list of your parameters.
+ * Add some public function (e.g. ``initModule_<mymodule>()``) that calls info() of your algorithm and put it into the same source file as ``info()`` implementation. This is to force C++ linker to include this object file into the target application. See ``Algorithm::create()`` for details.
+ 
--- a/modules/core/doc/old_basic_structures.rst
+++ b/modules/core/doc/old_basic_structures.rst
@@ -195,6 +195,29 @@ Stores coordinates of a rectangle.
    
 .. seealso:: :ocv:class:`Rect\_`

+
+CvBox2D
+------
+
+.. ocv:struct:: CvBox2D
+
+Stores coordinates of a rotated rectangle.
+
+    .. ocv:member:: CvPoint2D32f center
+
+        Center of the box
+
+    .. ocv:member:: CvSize2D32f  size
+
+        Box width and height
+
+    .. ocv:member:: float angle
+
+        Angle between the horizontal axis and the first side (i.e. length) in degrees
+
+.. seealso:: :ocv:class:`RotatedRect`
+
+
 CvScalar
 --------

--- a/modules/core/doc/pics/rotatedrect.png
+++ b/modules/core/doc/pics/rotatedrect.png
--- a/modules/core/include/opencv2/core/core.hpp
+++ b/modules/core/include/opencv2/core/core.hpp
@@ -2020,6 +2020,15 @@ public:
 };

    
+typedef void (*BinaryFunc)(const uchar* src1, size_t step1,
+                           const uchar* src2, size_t step2,
+                           uchar* dst, size_t step, Size sz,
+                           void*);
+
+CV_EXPORTS BinaryFunc getConvertFunc(int sdepth, int ddepth);
+CV_EXPORTS BinaryFunc getConvertScaleFunc(int sdepth, int ddepth);
+CV_EXPORTS BinaryFunc getCopyMaskFunc(size_t esz);    
+    
 //! swaps two matrices
 CV_EXPORTS void swap(Mat& a, Mat& b);
    
@@ -2133,11 +2142,11 @@ CV_EXPORTS Mat repeat(const Mat& src, int ny, int nx);
        
 CV_EXPORTS void hconcat(const Mat* src, size_t nsrc, OutputArray dst);
 CV_EXPORTS void hconcat(InputArray src1, InputArray src2, OutputArray dst);
-CV_EXPORTS_W void hconcat(InputArray src, OutputArray dst);
+CV_EXPORTS_W void hconcat(InputArrayOfArrays src, OutputArray dst);

 CV_EXPORTS void vconcat(const Mat* src, size_t nsrc, OutputArray dst);
 CV_EXPORTS void vconcat(InputArray src1, InputArray src2, OutputArray dst);
-CV_EXPORTS_W void vconcat(InputArray src, OutputArray dst);
+CV_EXPORTS_W void vconcat(InputArrayOfArrays src, OutputArray dst);
    
 //! computes bitwise conjunction of the two arrays (dst = src1 & src2)
 CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2,
@@ -2205,6 +2214,9 @@ CV_EXPORTS_W void magnitude(InputArray x, InputArray y, OutputArray magnitude);
 //! checks that each matrix element is within the specified range.
 CV_EXPORTS_W bool checkRange(InputArray a, bool quiet=true, CV_OUT Point* pos=0,
                            double minVal=-DBL_MAX, double maxVal=DBL_MAX);
+//! converts NaN's to the given number
+CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val=0);
+    
 //! implements generalized matrix product algorithm GEMM from BLAS
 CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha,
                       InputArray src3, double gamma, OutputArray dst, int flags=0);
@@ -4263,7 +4275,7 @@ template<typename _Tp> struct ParamType {};
 /*!
  Base class for high-level OpenCV algorithms
 */    
-class CV_EXPORTS Algorithm
+class CV_EXPORTS_W Algorithm
 {
 public:
    Algorithm();
@@ -4272,13 +4284,22 @@ public:
    
    template<typename _Tp> typename ParamType<_Tp>::member_type get(const string& name) const;
    template<typename _Tp> typename ParamType<_Tp>::member_type get(const char* name) const;
-    void set(const string& name, int value);
-    void set(const string& name, double value);
-    void set(const string& name, bool value);
-    void set(const string& name, const string& value);
-    void set(const string& name, const Mat& value);
-    void set(const string& name, const vector<Mat>& value);
-    void set(const string& name, const Ptr<Algorithm>& value);
+    
+    CV_WRAP int getInt(const string& name) const;
+    CV_WRAP double getDouble(const string& name) const;
+    CV_WRAP bool getBool(const string& name) const;
+    CV_WRAP string getString(const string& name) const;
+    CV_WRAP Mat getMat(const string& name) const;
+    CV_WRAP vector<Mat> getMatVector(const string& name) const;
+    CV_WRAP Ptr<Algorithm> getAlgorithm(const string& name) const;
+    
+    CV_WRAP_AS(setInt) void set(const string& name, int value);
+    CV_WRAP_AS(setDouble) void set(const string& name, double value);
+    CV_WRAP_AS(setBool) void set(const string& name, bool value);
+    CV_WRAP_AS(setString) void set(const string& name, const string& value);
+    CV_WRAP_AS(setMat) void set(const string& name, const Mat& value);
+    CV_WRAP_AS(setMatVector) void set(const string& name, const vector<Mat>& value);
+    CV_WRAP_AS(setAlgorithm) void set(const string& name, const Ptr<Algorithm>& value);
    
    void set(const char* name, int value);
    void set(const char* name, double value);
@@ -4288,10 +4309,10 @@ public:
    void set(const char* name, const vector<Mat>& value);
    void set(const char* name, const Ptr<Algorithm>& value);
    
-    string paramHelp(const string& name) const;
+    CV_WRAP string paramHelp(const string& name) const;
    int paramType(const char* name) const;
-    int paramType(const string& name) const;
-    void getParams(vector<string>& names) const;
+    CV_WRAP int paramType(const string& name) const;
+    CV_WRAP void getParams(CV_OUT vector<string>& names) const;
    
    
    virtual void write(FileStorage& fs) const;
@@ -4301,8 +4322,8 @@ public:
    typedef int (Algorithm::*Getter)() const;
    typedef void (Algorithm::*Setter)(int);
    
-    static void getList(vector<string>& algorithms);
-    static Ptr<Algorithm> _create(const string& name);
+    CV_WRAP static void getList(CV_OUT vector<string>& algorithms);
+    CV_WRAP static Ptr<Algorithm> _create(const string& name);
    template<typename _Tp> static Ptr<_Tp> create(const string& name);
    
    virtual AlgorithmInfo* info() const /* TODO: make it = 0;*/ { return 0; }
@@ -4312,10 +4333,10 @@ public:
 class CV_EXPORTS AlgorithmInfo
 {
 public:
+    friend class Algorithm;
    AlgorithmInfo(const string& name, Algorithm::Constructor create);
    ~AlgorithmInfo();
    void get(const Algorithm* algo, const char* name, int argType, void* value) const;
-    void set(Algorithm* algo, const char* name, int argType, const void* value) const;
    void addParam_(Algorithm& algo, const char* name, int argType,
                   void* value, bool readOnly, 
                   Algorithm::Getter getter, Algorithm::Setter setter,
@@ -4365,6 +4386,8 @@ public:
                  const string& help=string());
 protected:
    AlgorithmInfoData* data;
+    void set(Algorithm* algo, const char* name, int argType,
+              const void* value, bool force=false) const;
 };


--- a/modules/core/include/opencv2/core/devmem2d.hpp
+++ b/modules/core/include/opencv2/core/devmem2d.hpp
@@ -1,161 +1,161 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_CORE_DevMem2D_HPP__
-#define __OPENCV_CORE_DevMem2D_HPP__
-
-#ifdef __cplusplus
-
-#ifdef __CUDACC__ 
-    #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__ 
-#else
-    #define __CV_GPU_HOST_DEVICE__
-#endif
-
-namespace cv
-{    
-    namespace gpu
-    {
-        // Simple lightweight structures that encapsulates information about an image on device.
-        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
-
-        template <bool expr> struct StaticAssert;
-        template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
-
-		template<typename T> struct DevPtr
-		{
-			typedef T elem_type;
-			typedef int index_type;
-
-			enum { elem_size = sizeof(elem_type) };
-
-			T* data;
-
-			__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
-			__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
-
-			__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
-			__CV_GPU_HOST_DEVICE__ operator       T*()       { return data; }
-			__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
-		};
-		
-		template<typename T> struct PtrSz : public DevPtr<T>
-        {                     
-            __CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
-            __CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
-
-            size_t size;
-        };
-
-		template<typename T> struct PtrStep : public DevPtr<T>
-        {   
-            __CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
-			__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
-
-            /** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
-            size_t step;            
-
-			__CV_GPU_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)DevPtr<T>::data + y * step); }
-            __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
-
-			__CV_GPU_HOST_DEVICE__       T& operator ()(int y, int x)       { return ptr(y)[x]; }
-            __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
-        };
-
-		template <typename T> struct PtrStepSz : public PtrStep<T>
-        {   
-            __CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
-            __CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_) 
-                : PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
-
-            int cols;
-            int rows;                                                                              
-        };
-
-		template <typename T> struct DevMem2D_ : public PtrStepSz<T>
-        {            
-            DevMem2D_() {}
-			DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
-                            
-            template <typename U>            
-			explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}                                                                
-        };
-		               
-        template<typename T> struct PtrElemStep_ : public PtrStep<T>
-        {                   
-            PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) 
-            {
-                StaticAssert<256 % sizeof(T) == 0>::check();
-
-                PtrStep<T>::step /= PtrStep<T>::elem_size;             
-            }
-            __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
-            __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }  
-
-            __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
-            __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }                  
-        };
-
-		template<typename T> struct PtrStep_ : public PtrStep<T>
-        {            
-            PtrStep_() {}
-            PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}                        
-        };
-
-        typedef DevMem2D_<unsigned char> DevMem2Db;
-		typedef DevMem2Db DevMem2D;
-        typedef DevMem2D_<float> DevMem2Df;
-        typedef DevMem2D_<int> DevMem2Di;
-
-        typedef PtrStep<unsigned char> PtrStepb;
-        typedef PtrStep<float> PtrStepf;
-        typedef PtrStep<int> PtrStepi;
-
-        typedef PtrElemStep_<unsigned char> PtrElemStep;
-        typedef PtrElemStep_<float> PtrElemStepf;
-        typedef PtrElemStep_<int> PtrElemStepi;		
-    }    
-}
-
-#endif // __cplusplus
-
-#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_DevMem2D_HPP__
+#define __OPENCV_CORE_DevMem2D_HPP__
+
+#ifdef __cplusplus
+
+#ifdef __CUDACC__
+    #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
+#else
+    #define __CV_GPU_HOST_DEVICE__
+#endif
+
+namespace cv
+{
+    namespace gpu
+    {
+        // Simple lightweight structures that encapsulates information about an image on device.
+        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
+
+        template <bool expr> struct StaticAssert;
+        template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
+
+		template<typename T> struct DevPtr
+		{
+			typedef T elem_type;
+			typedef int index_type;
+
+            enum { elem_size = sizeof(elem_type) };
+
+            T* data;
+
+            __CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
+            __CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
+
+            __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
+            __CV_GPU_HOST_DEVICE__ operator       T*()       { return data; }
+            __CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
+        };
+
+        template<typename T> struct PtrSz : public DevPtr<T>
+        {
+            __CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
+            __CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
+
+            size_t size;
+        };
+
+        template<typename T> struct PtrStep : public DevPtr<T>
+        {
+            __CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
+            __CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
+
+            /** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
+            size_t step;
+
+            __CV_GPU_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)DevPtr<T>::data + y * step); }
+            __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
+
+            __CV_GPU_HOST_DEVICE__       T& operator ()(int y, int x)       { return ptr(y)[x]; }
+            __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
+        };
+
+        template <typename T> struct PtrStepSz : public PtrStep<T>
+        {
+            __CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
+            __CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
+                : PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
+
+            int cols;
+            int rows;
+        };
+
+        template <typename T> struct DevMem2D_ : public PtrStepSz<T>
+        {
+            DevMem2D_() {}
+            DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
+
+            template <typename U>
+            explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
+        };
+
+        template<typename T> struct PtrElemStep_ : public PtrStep<T>
+        {
+            PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
+            {
+                StaticAssert<256 % sizeof(T) == 0>::check();
+
+                PtrStep<T>::step /= PtrStep<T>::elem_size;
+            }
+            __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
+            __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
+
+            __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
+            __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
+        };
+
+        template<typename T> struct PtrStep_ : public PtrStep<T>
+        {
+            PtrStep_() {}
+            PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
+        };
+
+        typedef DevMem2D_<unsigned char> DevMem2Db;
+        typedef DevMem2Db DevMem2D;
+        typedef DevMem2D_<float> DevMem2Df;
+        typedef DevMem2D_<int> DevMem2Di;
+
+        typedef PtrStep<unsigned char> PtrStepb;
+        typedef PtrStep<float> PtrStepf;
+        typedef PtrStep<int> PtrStepi;
+
+        typedef PtrElemStep_<unsigned char> PtrElemStep;
+        typedef PtrElemStep_<float> PtrElemStepf;
+        typedef PtrElemStep_<int> PtrElemStepi;
+    }
+}
+
+#endif // __cplusplus
+
+#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -561,18 +561,28 @@ template<typename _Tp> inline const _Tp& Mat::at(Point pt) const

 template<typename _Tp> inline _Tp& Mat::at(int i0)
 {
-    CV_DbgAssert( dims <= 2 && data && (size.p[0] == 1 || size.p[1] == 1) &&
-                 (unsigned)i0 < (unsigned)(size.p[0] + size.p[1] - 1) &&
+    CV_DbgAssert( dims <= 2 && data &&
+                 (unsigned)i0 < (unsigned)(size.p[0]*size.p[1]) &&
                 elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
-    return *(_Tp*)(data + step.p[size.p[0]==1]*i0);
+    if( isContinuous() || size.p[0] == 1 )
+        return ((_Tp*)data)[i0];
+    if( size.p[1] == 1 )
+        return *(_Tp*)(data + step.p[0]*i0);
+    int i = i0/cols, j = i0 - i*cols;
+    return ((_Tp*)(data + step.p[0]*i))[j];
 }
    
 template<typename _Tp> inline const _Tp& Mat::at(int i0) const
 {
-    CV_DbgAssert( dims <= 2 && data && (size.p[0] == 1 || size.p[1] == 1) &&
-                 (unsigned)i0 < (unsigned)(size.p[0] + size.p[1] - 1) &&
+    CV_DbgAssert( dims <= 2 && data &&
+                 (unsigned)i0 < (unsigned)(size.p[0]*size.p[1]) &&
                 elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
-    return *(_Tp*)(data + step.p[size.p[0]==1]*i0);
+    if( isContinuous() || size.p[0] == 1 )
+        return ((const _Tp*)data)[i0];
+    if( size.p[1] == 1 )
+        return *(const _Tp*)(data + step.p[0]*i0);
+    int i = i0/cols, j = i0 - i*cols;
+    return ((const _Tp*)(data + step.p[0]*i))[j];
 }
    
 template<typename _Tp> inline _Tp& Mat::at(int i0, int i1, int i2)
--- a/modules/core/include/opencv2/core/operations.hpp
+++ b/modules/core/include/opencv2/core/operations.hpp
@@ -52,7 +52,7 @@
 #ifdef __cplusplus

 /////// exchange-add operation for atomic operations on reference counters ///////
-#ifdef __INTEL_COMPILER // atomic increment on the Intel(tm) compiler
+#if defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)   // atomic increment on the linux version of the Intel(tm) compiler
  #define CV_XADD(addr,delta) _InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
 #elif defined __GNUC__
    
@@ -638,6 +638,14 @@ Matx<_Tp, m, n> operator * (const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b)
 }

    
+template<typename _Tp, int m, int n> static inline
+Vec<_Tp, m> operator * (const Matx<_Tp, m, n>& a, const Vec<_Tp, n>& b)
+{
+    Matx<_Tp, m, 1> c(a, b, Matx_MatMulOp());
+    return reinterpret_cast<const Vec<_Tp, m>&>(c);
+}
+    
+    
 template<typename _Tp> static inline
 Point_<_Tp> operator * (const Matx<_Tp, 2, 2>& a, const Point_<_Tp>& b)
 {
@@ -668,14 +676,23 @@ Matx<_Tp, 4, 1> operator * (const Matx<_Tp, 4, 4>& a, const Point3_<_Tp>& b)
    return a*Matx<_Tp, 4, 1>(b.x, b.y, b.z, 1);
 }    

-    
+
 template<typename _Tp> static inline
 Scalar operator * (const Matx<_Tp, 4, 4>& a, const Scalar& b)
 {
-    return Scalar(a*Matx<_Tp, 4, 1>(b[0],b[1],b[2],b[3]));
-}    
-    
+    Matx<double, 4, 1> c(Matx<double, 4, 4>(a), b, Matx_MatMulOp());
+    return reinterpret_cast<const Scalar&>(c);
+}

+    
+static inline
+Scalar operator * (const Matx<double, 4, 4>& a, const Scalar& b)
+{
+    Matx<double, 4, 1> c(a, b, Matx_MatMulOp());
+    return reinterpret_cast<const Scalar&>(c);
+}
+
+    
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n> Matx<_Tp, m, n>::mul(const Matx<_Tp, m, n>& a) const
 {
--- a/modules/core/include/opencv2/core/types_c.h
+++ b/modules/core/include/opencv2/core/types_c.h
@@ -317,16 +317,19 @@ CV_INLINE  int  cvRound( double value )
 #endif
 }

+#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
+#include "emmintrin.h"
+#endif

 CV_INLINE  int  cvFloor( double value )
 {
-#ifdef __GNUC__
-    int i = (int)value;
-    return i - (i > value);
-#elif defined _MSC_VER && defined _M_X64
+#if defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)
    __m128d t = _mm_set_sd( value );
    int i = _mm_cvtsd_si32(t);
    return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
+#elif defined __GNUC__
+    int i = (int)value;
+    return i - (i > value);
 #else
    int i = cvRound(value);
    Cv32suf diff;
@@ -338,13 +341,13 @@ CV_INLINE  int  cvFloor( double value )

 CV_INLINE  int  cvCeil( double value )
 {
-#ifdef __GNUC__
-    int i = (int)value;
-    return i + (i < value);
-#elif defined _MSC_VER && defined _M_X64
+#if defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)
    __m128d t = _mm_set_sd( value );
    int i = _mm_cvtsd_si32(t);
    return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
+#elif defined __GNUC__
+    int i = (int)value;
+    return i + (i < value);
 #else
    int i = cvRound(value);
    Cv32suf diff;
--- a/modules/core/src/algorithm.cpp
+++ b/modules/core/src/algorithm.cpp
@@ -251,6 +251,41 @@ void Algorithm::set(const char* name, const Ptr<Algorithm>& value)
    info()->set(this, name, ParamType<Algorithm>::type, &value);
 }
    
+int Algorithm::getInt(const string& name) const
+{
+    return get<int>(name);
+}
+    
+double Algorithm::getDouble(const string& name) const
+{
+    return get<double>(name);
+}
+
+bool Algorithm::getBool(const string& name) const
+{
+    return get<bool>(name);
+}
+
+string Algorithm::getString(const string& name) const
+{
+    return get<string>(name);
+}
+
+Mat Algorithm::getMat(const string& name) const
+{
+    return get<Mat>(name);
+}
+
+vector<Mat> Algorithm::getMatVector(const string& name) const
+{
+    return get<vector<Mat> >(name);
+}
+
+Ptr<Algorithm> Algorithm::getAlgorithm(const string& name) const
+{
+    return get<Algorithm>(name);
+}
+    
 string Algorithm::paramHelp(const string& name) const
 {
    return info()->paramHelp(name.c_str());
@@ -296,9 +331,9 @@ AlgorithmInfo::~AlgorithmInfo()
    
 void AlgorithmInfo::write(const Algorithm* algo, FileStorage& fs) const
 {
-    size_t i = 0, n = data->params.vec.size();
+    size_t i = 0, nparams = data->params.vec.size();
    cv::write(fs, "name", algo->name());
-    for( i = 0; i < n; i++ )
+    for( i = 0; i < nparams; i++ )
    {
        const Param& p = data->params.vec[i].second;
        const string& pname = data->params.vec[i].first;
@@ -327,9 +362,10 @@ void AlgorithmInfo::write(const Algorithm* algo, FileStorage& fs) const

 void AlgorithmInfo::read(Algorithm* algo, const FileNode& fn) const
 {
-    size_t i = 0, n = data->params.vec.size();
+    size_t i = 0, nparams = data->params.vec.size();
+    AlgorithmInfo* info = algo->info();
    
-    for( i = 0; i < n; i++ )
+    for( i = 0; i < nparams; i++ )
    {
        const Param& p = data->params.vec[i].second;
        const string& pname = data->params.vec[i].first;
@@ -337,31 +373,43 @@ void AlgorithmInfo::read(Algorithm* algo, const FileNode& fn) const
        if( n.empty() )
            continue;
        if( p.type == Param::INT )
-            algo->set(pname, (int)n);
+        {
+            int val = (int)n;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::BOOLEAN )
-            algo->set(pname, (int)n != 0);
+        {
+            bool val = (int)n != 0;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::REAL )
-            algo->set(pname, (double)n);
+        {
+            double val = (double)n;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::STRING )
-            algo->set(pname, (string)n);
+        {
+            string val = (string)n;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
        else if( p.type == Param::MAT )
        {
            Mat m;
            cv::read(n, m);
-            algo->set(pname, m);
+            info->set(algo, pname.c_str(), p.type, &m, true);
        }
        else if( p.type == Param::MAT_VECTOR )
        {
            vector<Mat> mv;
            cv::read(n, mv);
-            algo->set(pname, mv);
+            info->set(algo, pname.c_str(), p.type, &mv, true);
        }
        else if( p.type == Param::ALGORITHM )
        {
            Ptr<Algorithm> nestedAlgo = Algorithm::_create((string)n["name"]);
            CV_Assert( !nestedAlgo.empty() );
            nestedAlgo->read(n);
-            algo->set(pname, nestedAlgo);
+            info->set(algo, pname.c_str(), p.type, &nestedAlgo, true);
        }
        else
            CV_Error( CV_StsUnsupportedFormat, "unknown/unsupported parameter type");
@@ -391,24 +439,24 @@ union GetSetParam
    void (Algorithm::*set_mat_vector)(const vector<Mat>&);
    void (Algorithm::*set_algo)(const Ptr<Algorithm>&);
 };
-    
-void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const void* value) const
+
+void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const void* value, bool force) const
 {
    const Param* p = findstr(data->params, name);
-    
+
    if( !p )
        CV_Error_( CV_StsBadArg, ("No parameter '%s' is found", name ? name : "<NULL>") );
-    
-    if( p->readonly )
+
+    if( !force && p->readonly )
        CV_Error_( CV_StsError, ("Parameter '%s' is readonly", name));
-    
+
    GetSetParam f;
    f.set_int = p->setter;
-    
+
    if( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL )
    {
        CV_Assert( p->type == Param::INT || p->type == Param::REAL || p->type == Param::BOOLEAN );
-        
+
        if( p->type == Param::INT )
        {
            int val = argType == Param::INT ? *(const int*)value :
@@ -443,7 +491,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::STRING )
    {
        CV_Assert( p->type == Param::STRING );
-        
+
        const string& val = *(const string*)value;
        if( p->setter )
            (algo->*f.set_string)(val);
@@ -453,7 +501,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::MAT )
    {
        CV_Assert( p->type == Param::MAT );
-        
+
        const Mat& val = *(const Mat*)value;
        if( p->setter )
            (algo->*f.set_mat)(val);
@@ -463,7 +511,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::MAT_VECTOR )
    {
        CV_Assert( p->type == Param::MAT_VECTOR );
-        
+
        const vector<Mat>& val = *(const vector<Mat>*)value;
        if( p->setter )
            (algo->*f.set_mat_vector)(val);
@@ -473,7 +521,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* name, int argType, const vo
    else if( argType == Param::ALGORITHM )
    {
        CV_Assert( p->type == Param::ALGORITHM );
-        
+
        const Ptr<Algorithm>& val = *(const Ptr<Algorithm>*)value;
        if( p->setter )
            (algo->*f.set_algo)(val);
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@@ -2186,6 +2186,28 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
 }

    
+void patchNaNs( InputOutputArray _a, double _val )
+{
+    Mat a = _a.getMat();
+    CV_Assert( a.depth() == CV_32F );
+    
+    const Mat* arrays[] = {&a, 0};
+    int* ptrs[1];
+    NAryMatIterator it(arrays, (uchar**)ptrs);
+    size_t len = it.size*a.channels();
+    Cv32suf val;
+    val.f = (float)_val;
+    
+    for( size_t i = 0; i < it.nplanes; i++, ++it )
+    {
+        int* tptr = ptrs[0];
+        for( size_t j = 0; j < len; j++ )
+            if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
+                tptr[j] = val.i;
+    }
+}
+
+    
 void exp(const float* src, float* dst, int n)
 {
    Exp_32f(src, dst, n);
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@@ -176,15 +176,6 @@ struct NoVec

 extern volatile bool USE_SSE2;

-typedef void (*BinaryFunc)(const uchar* src1, size_t step1,
-                           const uchar* src2, size_t step2,
-                           uchar* dst, size_t step, Size sz,
-                           void*);
-
-BinaryFunc getConvertFunc(int sdepth, int ddepth);
-BinaryFunc getConvertScaleFunc(int sdepth, int ddepth);
-BinaryFunc getCopyMaskFunc(size_t esz);
-
 enum { BLOCK_SIZE = 1024 };

 #ifdef HAVE_IPP
--- a/modules/core/src/rand.cpp
+++ b/modules/core/src/rand.cpp
@@ -48,6 +48,10 @@

 #include "precomp.hpp"

+#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
+#include "emmintrin.h"
+#endif
+
 namespace cv
 {

@@ -196,33 +200,54 @@ DEF_RANDI_FUNC(8s, schar)
 DEF_RANDI_FUNC(16u, ushort)
 DEF_RANDI_FUNC(16s, short)
 DEF_RANDI_FUNC(32s, int)
-    
+
 static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
 {
    uint64 temp = *state;
-    int i;
+    int i = 0;

-    for( i = 0; i <= len - 4; i += 4 )
+    for( ; i <= len - 4; i += 4 )
    {
-        float f0, f1;
+        float f[4];
+        f[0] = (float)(int)(temp = RNG_NEXT(temp));
+        f[1] = (float)(int)(temp = RNG_NEXT(temp));
+        f[2] = (float)(int)(temp = RNG_NEXT(temp));
+        f[3] = (float)(int)(temp = RNG_NEXT(temp));

-        temp = RNG_NEXT(temp);
-        f0 = (int)temp*p[i][0] + p[i][1];
-        temp = RNG_NEXT(temp);
-        f1 = (int)temp*p[i+1][0] + p[i+1][1];
-        arr[i] = f0; arr[i+1] = f1;
+        // handwritten SSE is required not for performance but for numerical stability!
+        // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
+        // while 64-bit compilers generate single precision SIMD instructions
+        // so manual vectorisation forces all compilers to the single precision
+#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
+        __m128 q0 = _mm_loadu_ps((const float*)(p + i));
+        __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));

-        temp = RNG_NEXT(temp);
-        f0 = (int)temp*p[i+2][0] + p[i+2][1];
-        temp = RNG_NEXT(temp);
-        f1 = (int)temp*p[i+3][0] + p[i+3][1];
-        arr[i+2] = f0; arr[i+3] = f1;
+        __m128 q01l = _mm_unpacklo_ps(q0, q1);
+        __m128 q01h = _mm_unpackhi_ps(q0, q1);
+
+        __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
+        __m128 p1 = _mm_unpackhi_ps(q01l, q01h);
+
+        _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
+#else
+        arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
+        arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
+        arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
+        arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
+#endif
    }

    for( ; i < len; i++ )
    {
        temp = RNG_NEXT(temp);
+#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
+        _mm_store_ss(arr + i, _mm_add_ss(
+                _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
+                _mm_set_ss(p[i][1]))
+                );
+#else
        arr[i] = (int)temp*p[i][0] + p[i][1];
+#endif
    }

    *state = temp;
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -834,7 +834,6 @@ float normL2Sqr_(const float* a, const float* b, int n)
    }
    else
 #endif
-    //vz why do we need unroll here? no sse = no need to unroll
 	{
        for( ; j <= n - 4; j += 4 )
        {
@@ -875,7 +874,6 @@ float normL1_(const float* a, const float* b, int n)
    }
    else
 #endif
-     //vz no need to unroll here - if no sse
    {
        for( ; j <= n - 4; j += 4 )
        {
@@ -916,7 +914,6 @@ int normL1_(const uchar* a, const uchar* b, int n)
    }
    else
 #endif
-     //vz why do we need unroll here? no sse = no unroll
    {
        for( ; j <= n - 4; j += 4 )
        {
@@ -965,6 +962,34 @@ static const uchar popCountTable4[] =
    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 };

+int normHamming(const uchar* a, int n)
+{
+    int i = 0, result = 0;
+#if CV_NEON
+    if (CPU_HAS_NEON_FEATURE)
+    {
+        uint32x4_t bits = vmovq_n_u32(0);
+        for (; i <= n - 16; i += 16) {
+            uint8x16_t A_vec = vld1q_u8 (a + i);
+            uint8x16_t bitsSet = vcntq_u8 (A_vec);
+            uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
+            uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
+            bits = vaddq_u32(bits, bitSet4);
+        }
+        uint64x2_t bitSet2 = vpaddlq_u32 (bits);
+        result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
+        result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+    }
+    else
+#endif
+        for( ; i <= n - 4; i += 4 )
+            result += popCountTable[a[i]] + popCountTable[a[i+1]] +
+            popCountTable[a[i+2]] + popCountTable[a[i+3]];
+    for( ; i < n; i++ )
+        result += popCountTable[a[i]];
+    return result;
+}
+    
 int normHamming(const uchar* a, const uchar* b, int n)
 {
    int i = 0, result = 0;
@@ -995,6 +1020,27 @@ int normHamming(const uchar* a, const uchar* b, int n)
    return result;
 }

+int normHamming(const uchar* a, int n, int cellSize)
+{
+    if( cellSize == 1 )
+        return normHamming(a, n);
+    const uchar* tab = 0;
+    if( cellSize == 2 )
+        tab = popCountTable2;
+    else if( cellSize == 4 )
+        tab = popCountTable4;
+    else
+        CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
+    int i = 0, result = 0;
+#if CV_ENABLE_UNROLLED
+    for( ; i <= n - 4; i += 4 )
+        result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
+#endif
+    for( ; i < n; i++ )
+        result += tab[a[i]];
+    return result;
+}    
+    
 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
 {
    if( cellSize == 1 )
@@ -1221,38 +1267,80 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
    int depth = src.depth(), cn = src.channels();
    
    normType &= 7;
-    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
+    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
+               ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
    
-    if( depth == CV_32F && src.isContinuous() && mask.empty() )
+    if( src.isContinuous() && mask.empty() )
    {
        size_t len = src.total()*cn;
        if( len == (size_t)(int)len )
        {
-            const float* data = src.ptr<float>();
-            
-            if( normType == NORM_L2 )
+            if( depth == CV_32F )
            {
-                double result = 0;
-                GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
-                return std::sqrt(result);
+                const float* data = src.ptr<float>();
+                
+                if( normType == NORM_L2 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
+                    return std::sqrt(result);
+                }
+                if( normType == NORM_L2SQR )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_L1 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_INF )
+                {
+                    float result = 0;
+                    GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
+                    return result;
+                }
            }
-            if( normType == NORM_L1 )
+            if( depth == CV_8U )
            {
-                double result = 0;
-                GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
-                return result;
-            }
-            {
-                float result = 0;
-                GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
-                return result;
-
+                const uchar* data = src.ptr<uchar>();
+                
+                if( normType == NORM_HAMMING )
+                    return normHamming(data, (int)len);
+                
+                if( normType == NORM_HAMMING2 )
+                    return normHamming(data, (int)len, 2);
            }
        }
    }
    
    CV_Assert( mask.empty() || mask.type() == CV_8U );
    
+    if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
+    {
+        if( !mask.empty() )
+        {
+            Mat temp;
+            bitwise_and(src, mask, temp);
+            return norm(temp, normType);
+        }
+        int cellSize = normType == NORM_HAMMING ? 1 : 2;
+        
+        const Mat* arrays[] = {&src, 0};
+        uchar* ptrs[1];
+        NAryMatIterator it(arrays, ptrs);
+        int total = (int)it.size;
+        int result = 0;
+        
+        for( size_t i = 0; i < it.nplanes; i++, ++it )
+            result += normHamming(ptrs[0], total, cellSize);
+        
+        return result;
+    }
+    
    NormFunc func = normTab[normType >> 1][depth];
    CV_Assert( func != 0 );
    
@@ -1269,7 +1357,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
    NAryMatIterator it(arrays, ptrs);
    int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
    bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
-            (normType == NORM_L2 && depth <= CV_8S);
+            ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
    int isum = 0;
    int *ibuf = &result.i;
    size_t esz = 0;
@@ -1328,38 +1416,72 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
    CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
    
    normType &= 7;
-    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
+    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
+              ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
    
-    if( src1.depth() == CV_32F && src1.isContinuous() && src2.isContinuous() && mask.empty() )
+    if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
    {
        size_t len = src1.total()*src1.channels();
        if( len == (size_t)(int)len )
        {
-            const float* data1 = src1.ptr<float>();
-            const float* data2 = src2.ptr<float>();
-            
-            if( normType == NORM_L2 )
+            if( src1.depth() == CV_32F )
            {
-                double result = 0;
-                GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
-                return std::sqrt(result);
-            }
-            if( normType == NORM_L1 )
-            {
-                double result = 0;
-                GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
-                return result;
-            }
-            {
-                float result = 0;
-                GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
-                return result;
+                const float* data1 = src1.ptr<float>();
+                const float* data2 = src2.ptr<float>();
+                
+                if( normType == NORM_L2 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return std::sqrt(result);
+                }
+                if( normType == NORM_L2SQR )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_L1 )
+                {
+                    double result = 0;
+                    GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return result;
+                }
+                if( normType == NORM_INF )
+                {
+                    float result = 0;
+                    GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
+                    return result;
+                }
            }
        }
    }
    
    CV_Assert( mask.empty() || mask.type() == CV_8U );
    
+    if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
+    {
+        if( !mask.empty() )
+        {
+            Mat temp;
+            bitwise_xor(src1, src2, temp);
+            bitwise_and(temp, mask, temp);
+            return norm(temp, normType);
+        }
+        int cellSize = normType == NORM_HAMMING ? 1 : 2;
+        
+        const Mat* arrays[] = {&src1, &src2, 0};
+        uchar* ptrs[2];
+        NAryMatIterator it(arrays, ptrs);
+        int total = (int)it.size;
+        int result = 0;
+        
+        for( size_t i = 0; i < it.nplanes; i++, ++it )
+            result += normHamming(ptrs[0], ptrs[1], total, cellSize);
+        
+        return result;
+    }
+    
    NormDiffFunc func = normDiffTab[normType >> 1][depth];
    CV_Assert( func != 0 );
    
@@ -1377,7 +1499,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
    NAryMatIterator it(arrays, ptrs);
    int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
    bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
-            (normType == NORM_L2 && depth <= CV_8S);
+            ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
    unsigned isum = 0;
    unsigned *ibuf = &result.u;
    size_t esz = 0;
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -1186,8 +1186,12 @@ struct CountNonZeroOp : public BaseElemWiseOp
    
 struct MeanStdDevOp : public BaseElemWiseOp
 {
+    Scalar sqmeanRef;
+    int cn;
+
    MeanStdDevOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
    {
+        cn = 0;
        context = 7;
    };
    void op(const vector<Mat>& src, Mat& dst, const Mat& mask)
@@ -1202,6 +1206,9 @@ struct MeanStdDevOp : public BaseElemWiseOp
        cvtest::multiply(temp, temp, temp);
        Scalar mean = cvtest::mean(src[0], mask);
        Scalar sqmean = cvtest::mean(temp, mask);
+        
+        sqmeanRef = sqmean;
+        cn = temp.channels();

        for( int c = 0; c < 4; c++ )
            sqmean[c] = std::sqrt(std::max(sqmean[c] - mean[c]*mean[c], 0.)); 
@@ -1212,7 +1219,11 @@ struct MeanStdDevOp : public BaseElemWiseOp
    }
    double getMaxErr(int)
    {
-        return 1e-6;
+        CV_Assert(cn > 0);
+        double err = sqmeanRef[0];
+        for(int i = 1; i < cn; ++i)
+            err = std::max(err, sqmeanRef[i]);
+        return 3e-7 * err;
    }
 };    

@@ -1226,7 +1237,20 @@ struct NormOp : public BaseElemWiseOp
    };
    int getRandomType(RNG& rng)
    {
-        return cvtest::randomType(rng, DEPTH_MASK_ALL_BUT_8S, 1, 4);
+        int type = cvtest::randomType(rng, DEPTH_MASK_ALL_BUT_8S, 1, 4);
+        for(;;)
+        {
+            normType = rng.uniform(1, 8);
+            if( normType == NORM_INF || normType == NORM_L1 ||
+                normType == NORM_L2 || normType == NORM_L2SQR ||
+                normType == NORM_HAMMING || normType == NORM_HAMMING2 )
+                break;
+        }
+        if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
+        {
+            type = CV_8U;
+        }
+        return type;
    }
    void op(const vector<Mat>& src, Mat& dst, const Mat& mask)
    {
@@ -1242,7 +1266,6 @@ struct NormOp : public BaseElemWiseOp
    }
    void generateScalars(int, RNG& rng)
    {
-        normType = 1 << rng.uniform(0, 3);
    }
    double getMaxErr(int)
    {
--- a/modules/core/test/test_eigen.cpp
+++ b/modules/core/test/test_eigen.cpp
@@ -79,10 +79,12 @@ protected:
    bool check_full(int type);													// compex test for symmetric matrix
    virtual void run (int) = 0;													// main testing method

-private:
+protected:

    float eps_val_32, eps_vec_32;
    float eps_val_64, eps_vec_64;
+    int ntests;
+    
    bool check_pair_count(const cv::Mat& src, const cv::Mat& evalues, int low_index = -1, int high_index = -1);
    bool check_pair_count(const cv::Mat& src, const cv::Mat& evalues, const cv::Mat& evectors, int low_index = -1, int high_index = -1);
    bool check_pairs_order(const cv::Mat& eigen_values);											// checking order of eigen values & vectors (it should be none up)
@@ -140,8 +142,7 @@ Core_EigenTest_Scalar_64::~Core_EigenTest_Scalar_64() {}

 void Core_EigenTest_Scalar_32::run(int) 
 {
-    const size_t MATRIX_COUNT = 500;
-    for (size_t i = 0; i < MATRIX_COUNT; ++i)
+    for (int i = 0; i < ntests; ++i)
    {
        float value = cv::randu<float>();
        cv::Mat src(1, 1, CV_32FC1, Scalar::all((float)value));
@@ -151,8 +152,7 @@ void Core_EigenTest_Scalar_32::run(int)

 void Core_EigenTest_Scalar_64::run(int)
 {
-    const size_t MATRIX_COUNT = 500;
-    for (size_t i = 0; i < MATRIX_COUNT; ++i)
+    for (int i = 0; i < ntests; ++i)
    {
        float value = cv::randu<float>();
        cv::Mat src(1, 1, CV_64FC1, Scalar::all((double)value));
@@ -163,7 +163,9 @@ void Core_EigenTest_Scalar_64::run(int)
 void Core_EigenTest_32::run(int) { check_full(CV_32FC1); }
 void Core_EigenTest_64::run(int) { check_full(CV_64FC1); }

-Core_EigenTest::Core_EigenTest() : eps_val_32(1e-3f), eps_vec_32(1e-2f), eps_val_64(1e-4f), eps_vec_64(1e-3f) {}
+Core_EigenTest::Core_EigenTest()
+: eps_val_32(1e-3f), eps_vec_32(1e-2f),
+  eps_val_64(1e-4f), eps_vec_64(1e-3f), ntests(100) {}
 Core_EigenTest::~Core_EigenTest() {}

 bool Core_EigenTest::check_pair_count(const cv::Mat& src, const cv::Mat& evalues, int low_index, int high_index)
@@ -382,14 +384,13 @@ bool Core_EigenTest::test_values(const cv::Mat& src)

 bool Core_EigenTest::check_full(int type)
 {
-    const int MATRIX_COUNT = 500;
    const int MAX_DEGREE = 7;

    srand((unsigned int)time(0));

-    for (int i = 1; i <= MATRIX_COUNT; ++i)
+    for (int i = 0; i < ntests; ++i)
    {
-        int src_size = (int)(std::pow(2.0, (rand()%MAX_DEGREE+1)*1.0));
+        int src_size = (int)(std::pow(2.0, (rand()%MAX_DEGREE)+1.));

        cv::Mat src(src_size, src_size, type);

--- a/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@@ -1802,6 +1802,7 @@ Core_MatrixTest( 1, 4, false, false, 1 ),
 flags(0), have_u(false), have_v(false), symmetric(false), compact(false), vector_w(false)
 {
    test_case_count = 100;
+    max_log_array_size = 8;
    test_array[TEMP].push_back(NULL);
    test_array[TEMP].push_back(NULL);
    test_array[TEMP].push_back(NULL);
--- a/modules/core/test/test_operations.cpp
+++ b/modules/core/test/test_operations.cpp
@@ -74,11 +74,17 @@ protected:
    bool TestSparseMat();
    bool TestVec();
    bool TestMatxMultiplication();
+    bool TestSubMatAccess();
    bool operations1();

-    void checkDiff(const Mat& m1, const Mat& m2, const string& s) { if (norm(m1, m2, NORM_INF) != 0) throw test_excep(s); }
-    void checkDiffF(const Mat& m1, const Mat& m2, const string& s) { if (norm(m1, m2, NORM_INF) > 1e-5) throw test_excep(s); }
-
+    void checkDiff(const Mat& m1, const Mat& m2, const string& s)
+    {
+        if (norm(m1, m2, NORM_INF) != 0) throw test_excep(s);
+    }
+    void checkDiffF(const Mat& m1, const Mat& m2, const string& s)
+    {
+        if (norm(m1, m2, NORM_INF) > 1e-5) throw test_excep(s);
+    }
 };

 CV_OperationsTest::CV_OperationsTest()
@@ -438,6 +444,41 @@ bool CV_OperationsTest::SomeMatFunctions()
 }


+bool CV_OperationsTest::TestSubMatAccess()
+{
+    try
+    {
+        Mat_<float> T_bs(4,4);
+        Vec3f cdir(1.f, 1.f, 0.f);
+        Vec3f ydir(1.f, 0.f, 1.f);
+        Vec3f fpt(0.1f, 0.7f, 0.2f);
+        T_bs.setTo(0);
+        T_bs(Range(0,3),Range(2,3)) = 1.0*Mat(cdir); // wierd OpenCV stuff, need to do multiply
+        T_bs(Range(0,3),Range(1,2)) = 1.0*Mat(ydir);
+        T_bs(Range(0,3),Range(0,1)) = 1.0*Mat(cdir.cross(ydir));
+        T_bs(Range(0,3),Range(3,4)) = 1.0*Mat(fpt);
+        T_bs(3,3) = 1.0;
+        //std::cout << "[Nav Grok] S frame =" << std::endl << T_bs << std::endl;
+
+        // set up display coords, really just the S frame
+        std::vector<float>coords;
+        
+        for (int i=0; i<16; i++)
+        {
+            coords.push_back(T_bs(i));
+            //std::cout << T_bs1(i) << std::endl;
+        }
+        CV_Assert( norm(coords, T_bs.reshape(1,1), NORM_INF) == 0 );
+    }
+    catch (const test_excep& e)
+    {
+        ts->printf(cvtest::TS::LOG, "%s\n", e.s.c_str());
+        ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
+        return false;
+    }
+    return true;
+}
+
 bool CV_OperationsTest::TestTemplateMat()
 {  
    try
@@ -754,12 +795,35 @@ bool CV_OperationsTest::TestMatxMultiplication()
 { 
    try 
    { 
-        Matx33f mat(1, 0, 0, 0, 1, 0, 0, 0, 1); // Identity matrix 
+        Matx33f mat(1, 1, 1, 0, 1, 1, 0, 0, 1); // Identity matrix 
        Point2f pt(3, 4); 
        Point3f res = mat * pt; // Correctly assumes homogeneous coordinates 
-        if(res.x != 3.0) throw test_excep(); 
-        if(res.y != 4.0) throw test_excep(); 
-        if(res.z != 1.0) throw test_excep(); 
+        
+        Vec3f res2 = mat*Vec3f(res.x, res.y, res.z);
+        
+        if(res.x != 8.0) throw test_excep(); 
+        if(res.y != 5.0) throw test_excep(); 
+        if(res.z != 1.0) throw test_excep();
+        
+        if(res2[0] != 14.0) throw test_excep(); 
+        if(res2[1] != 6.0) throw test_excep(); 
+        if(res2[2] != 1.0) throw test_excep();
+        
+        Matx44f mat44f(1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1);
+        Matx44d mat44d(1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1);
+        Scalar s(4, 3, 2, 1);
+        Scalar sf = mat44f*s;
+        Scalar sd = mat44d*s;
+        
+        if(sf[0] != 10.0) throw test_excep(); 
+        if(sf[1] != 6.0) throw test_excep(); 
+        if(sf[2] != 3.0) throw test_excep();
+        if(sf[3] != 1.0) throw test_excep();
+        
+        if(sd[0] != 10.0) throw test_excep(); 
+        if(sd[1] != 6.0) throw test_excep(); 
+        if(sd[2] != 3.0) throw test_excep();
+        if(sd[3] != 1.0) throw test_excep();
    } 
    catch(const test_excep&) 
    { 
@@ -877,6 +941,9 @@ void CV_OperationsTest::run( int /* start_from */)
    
    if (!TestMatxMultiplication())
        return;
+    
+    if (!TestSubMatAccess())
+        return;

    if (!operations1())
        return;