Merge remote-tracking branch 'upstream/master'

2013-09-19 23:58:19 +10:00
parent 0e06465a23 0ecd7913f8
commit 8e8ff4dd33
140 changed files with 20358 additions and 345 deletions
--- a/modules/bioinspired/src/opencl/retina_kernel.cl
+++ b/modules/bioinspired/src/opencl/retina_kernel.cl
@@ -43,6 +43,9 @@
 //
 //M*/

+//data (which is float) is aligend in 32 bytes
+#define WIDTH_MULTIPLE (32 >> 2)
+
 /////////////////////////////////////////////////////////
 //*******************************************************
 // basicretinafilter
@@ -116,22 +119,18 @@ kernel void horizontalAnticausalFilter(

    float4 result_v4 = (float4)(0), out_v4;
    float result = 0;
-    // we assume elements_per_row is multple of 4
-    for(int i = 0; i < 4; ++ i, -- optr)
+    // we assume elements_per_row is multple of WIDTH_MULTIPLE
+    for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr)
    {
-        if(i < elements_per_row - cols)
-        {
-            *optr = result;
-        }
-        else
+        if(i >= elements_per_row - cols)
        {
            result = *optr + _a * result;
-            *optr = result;
        }
+        *optr = result;
    }
    result_v4.x = result;
    optr -= 3;
-    for(int i = 1; i < elements_per_row / 4; ++i, optr -= 4)
+    for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4)
    {
        // shift left, `offset` is type `size_t` so it cannot be negative
        out_v4 = vload4(0, optr);
@@ -223,23 +222,19 @@ kernel void horizontalAnticausalFilter_Irregular(

    float4 buf_v4, out_v4, res_v4 = (float4)(0);
    float result = 0;
-    // we assume elements_per_row is multple of 4
-    for(int i = 0; i < 4; ++ i, -- optr, -- bptr)
+    // we assume elements_per_row is multple of WIDTH_MULTIPLE
+    for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr, -- bptr)
    {
-        if(i < elements_per_row - cols)
-        {
-            *optr = result;
-        }
-        else
+        if(i >= elements_per_row - cols)
        {
            result = *optr + *bptr * result;
-            *optr = result;
        }
+        *optr = result;
    }
    res_v4.x = result;
    optr -= 3;
    bptr -= 3;
-    for(int i = 0; i < elements_per_row / 4 - 1; ++i, optr -= 4, bptr -= 4)
+    for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4, bptr -= 4)
    {
        buf_v4 = vload4(0, bptr);
        out_v4 = vload4(0, optr);
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@ -262,16 +262,16 @@ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2,
                                   double param1 = 3., double param2 = 0.99 );

 //! finds essential matrix from a set of corresponding 2D points using five-point algorithm
-CV_EXPORTS Mat findEssentialMat( InputArray points1, InputArray points2,
+CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2,
                                 double focal = 1.0, Point2d pp = Point2d(0, 0),
                                 int method = RANSAC, double prob = 0.999,
                                 double threshold = 1.0, OutputArray mask = noArray() );

 //! decompose essential matrix to possible rotation matrix and one translation vector
-CV_EXPORTS void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t );
+CV_EXPORTS_W void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t );

 //! recover relative camera pose from a set of corresponding 2D points
-CV_EXPORTS int recoverPose( InputArray E, InputArray points1, InputArray points2,
+CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2,
                            OutputArray R, OutputArray t,
                            double focal = 1.0, Point2d pp = Point2d(0, 0),
                            InputOutputArray mask = noArray() );
--- a/modules/calib3d/src/calibration.cpp
+++ b/modules/calib3d/src/calibration.cpp
@@ -1403,6 +1403,8 @@ CV_IMPL double cvCalibrateCamera2( const CvMat* objectPoints,
    }
    if( !(flags & CV_CALIB_RATIONAL_MODEL) )
        flags |= CV_CALIB_FIX_K4 + CV_CALIB_FIX_K5 + CV_CALIB_FIX_K6;
+    if( !(flags & CV_CALIB_THIN_PRISM_MODEL))
+        flags |= CALIB_FIX_S1_S2_S3_S4;
    if( flags & CV_CALIB_FIX_K1 )
        mask[4] = 0;
    if( flags & CV_CALIB_FIX_K2 )
@@ -1415,8 +1417,6 @@ CV_IMPL double cvCalibrateCamera2( const CvMat* objectPoints,
        mask[10] = 0;
    if( flags & CV_CALIB_FIX_K6 )
        mask[11] = 0;
-    if(!(flags & CV_CALIB_THIN_PRISM_MODEL))
-        flags |= CALIB_FIX_S1_S2_S3_S4;

    if(flags & CALIB_FIX_S1_S2_S3_S4)
    {
@@ -1638,12 +1638,12 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
                        CvTermCriteria termCrit,
                        int flags )
 {
-    const int NINTRINSIC = 12;
+    const int NINTRINSIC = 16;
    Ptr<CvMat> npoints, err, J_LR, Je, Ji, imagePoints[2], objectPoints, RT0;
    CvLevMarq solver;
    double reprojErr = 0;

-    double A[2][9], dk[2][8]={{0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0}}, rlr[9];
+    double A[2][9], dk[2][12]={{0,0,0,0,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,0,0,0,0}}, rlr[9];
    CvMat K[2], Dist[2], om_LR, T_LR;
    CvMat R_LR = cvMat(3, 3, CV_64F, rlr);
    int i, k, p, ni = 0, ofs, nimages, pointsTotal, maxPoints = 0;
@@ -1689,7 +1689,7 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
                (_imagePoints1->rows == 1 && _imagePoints1->cols == pointsTotal && cn == 2)) );

        K[k] = cvMat(3,3,CV_64F,A[k]);
-        Dist[k] = cvMat(1,8,CV_64F,dk[k]);
+        Dist[k] = cvMat(1,12,CV_64F,dk[k]);

        imagePoints[k].reset(cvCreateMat( points->rows, points->cols, CV_64FC(CV_MAT_CN(points->type))));
        cvConvert( points, imagePoints[k] );
@@ -1748,6 +1748,8 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
        uchar* imask = solver.mask->data.ptr + nparams - NINTRINSIC*2;
        if( !(flags & CV_CALIB_RATIONAL_MODEL) )
            flags |= CV_CALIB_FIX_K4 | CV_CALIB_FIX_K5 | CV_CALIB_FIX_K6;
+        if( !(flags & CV_CALIB_THIN_PRISM_MODEL) )
+            flags |= CV_CALIB_FIX_S1_S2_S3_S4;
        if( flags & CV_CALIB_FIX_ASPECT_RATIO )
            imask[0] = imask[NINTRINSIC] = 0;
        if( flags & CV_CALIB_FIX_FOCAL_LENGTH )
@@ -1768,6 +1770,13 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
            imask[10] = imask[NINTRINSIC+10] = 0;
        if( flags & CV_CALIB_FIX_K6 )
            imask[11] = imask[NINTRINSIC+11] = 0;
+        if( flags & CV_CALIB_FIX_S1_S2_S3_S4 )
+        {
+            imask[12] = imask[NINTRINSIC+12] = 0;
+            imask[13] = imask[NINTRINSIC+13] = 0;
+            imask[14] = imask[NINTRINSIC+14] = 0;
+            imask[15] = imask[NINTRINSIC+15] = 0;
+        }
    }

    /*
@@ -1842,6 +1851,10 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
            iparam[4] = dk[k][0]; iparam[5] = dk[k][1]; iparam[6] = dk[k][2];
            iparam[7] = dk[k][3]; iparam[8] = dk[k][4]; iparam[9] = dk[k][5];
            iparam[10] = dk[k][6]; iparam[11] = dk[k][7];
+            iparam[12] = dk[k][8];
+            iparam[13] = dk[k][9];
+            iparam[14] = dk[k][10];
+            iparam[15] = dk[k][11];
        }

    om_LR = cvMat(3, 1, CV_64F, solver.param->data.db);
@@ -1908,6 +1921,10 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1
                dk[k][5] = iparam[k*NINTRINSIC+9];
                dk[k][6] = iparam[k*NINTRINSIC+10];
                dk[k][7] = iparam[k*NINTRINSIC+11];
+                dk[k][8] = iparam[k*NINTRINSIC+12];
+                dk[k][9] = iparam[k*NINTRINSIC+13];
+                dk[k][10] = iparam[k*NINTRINSIC+14];
+                dk[k][11] = iparam[k*NINTRINSIC+15];
            }
        }

@@ -3009,6 +3026,7 @@ static Mat prepareDistCoeffs(Mat& distCoeffs0, int rtype)
    if( distCoeffs0.size() == Size(1, 4) ||
       distCoeffs0.size() == Size(1, 5) ||
       distCoeffs0.size() == Size(1, 8) ||
+       distCoeffs0.size() == Size(1, 12) ||
       distCoeffs0.size() == Size(4, 1) ||
       distCoeffs0.size() == Size(5, 1) ||
       distCoeffs0.size() == Size(8, 1) ||
--- a/modules/contrib/doc/facerec/facerec_api.rst
+++ b/modules/contrib/doc/facerec/facerec_api.rst
@@ -70,6 +70,8 @@ Moreover every :ocv:class:`FaceRecognizer` supports the:

 * **Loading/Saving** the model state from/to a given XML or YAML.

+.. note:: When using the FaceRecognizer interface in combination with Python, please stick to Python 2. Some underlying scripts like create_csv will not work in other versions, like Python 3.
+
 Setting the Thresholds
 +++++++++++++++++++++++

--- a/modules/core/include/opencv2/core/version.hpp
+++ b/modules/core/include/opencv2/core/version.hpp
@@ -48,10 +48,11 @@
 #ifndef __OPENCV_VERSION_HPP__
 #define __OPENCV_VERSION_HPP__

-#define CV_VERSION_EPOCH    2
-#define CV_VERSION_MAJOR    9
+#define CV_VERSION_EPOCH    3
+#define CV_VERSION_MAJOR    0
 #define CV_VERSION_MINOR    0
 #define CV_VERSION_REVISION 0
+#define CV_VERSION_STATUS   "-dev"

 #define CVAUX_STR_EXP(__A)  #__A
 #define CVAUX_STR(__A)      CVAUX_STR_EXP(__A)
@@ -60,9 +61,9 @@
 #define CVAUX_STRW(__A)      CVAUX_STRW_EXP(__A)

 #if CV_VERSION_REVISION
-#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION)
+#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS
 #else
-#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR)
+#  define CV_VERSION        CVAUX_STR(CV_VERSION_EPOCH) "." CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) CV_VERSION_STATUS
 #endif

 /* old  style version constants*/
--- a/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
@@ -56,6 +56,7 @@ namespace grid_histogram_detail
    template <int BIN_COUNT, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
    __global__ void histogram(const SrcPtr src, ResType* hist, const MaskPtr mask, const int rows, const int cols)
    {
+    #if CV_CUDEV_ARCH >= 120
        __shared__ ResType smem[BIN_COUNT];

        const int y = blockIdx.x * blockDim.y + threadIdx.y;
@@ -86,6 +87,7 @@ namespace grid_histogram_detail
            if (histVal > 0)
                atomicAdd(hist + i, histVal);
        }
+    #endif
    }

    template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
--- a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
@@ -57,6 +57,8 @@ namespace cv { namespace cudev {
 template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
 __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
 {
+    CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
    const int rows = getRows(src);
    const int cols = getCols(src);

@@ -75,6 +77,8 @@ __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const Mas
 template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType>
 __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
 {
+    CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
    const int rows = getRows(src);
    const int cols = getCols(src);

--- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
@@ -52,6 +52,40 @@
 #include "gpumat.hpp"
 #include "traits.hpp"

+namespace
+{
+    template <typename T> struct CvCudevTextureRef
+    {
+        typedef texture<T, cudaTextureType2D, cudaReadModeElementType> TexRef;
+
+        static TexRef ref;
+
+        __host__ static void bind(const cv::cudev::GlobPtrSz<T>& mat,
+                                  bool normalizedCoords = false,
+                                  cudaTextureFilterMode filterMode = cudaFilterModePoint,
+                                  cudaTextureAddressMode addressMode = cudaAddressModeClamp)
+        {
+            ref.normalized = normalizedCoords;
+            ref.filterMode = filterMode;
+            ref.addressMode[0] = addressMode;
+            ref.addressMode[1] = addressMode;
+            ref.addressMode[2] = addressMode;
+
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
+
+            CV_CUDEV_SAFE_CALL( cudaBindTexture2D(0, &ref, mat.data, &desc, mat.cols, mat.rows, mat.step) );
+        }
+
+        __host__ static void unbind()
+        {
+            CV_CUDEV_SAFE_CALL( cudaUnbindTexture(ref) );
+        }
+    };
+
+    template <typename T>
+    typename CvCudevTextureRef<T>::TexRef CvCudevTextureRef<T>::ref;
+}
+
 namespace cv { namespace cudev {

 template <typename T> struct TexturePtr
@@ -63,79 +97,73 @@ template <typename T> struct TexturePtr

    __device__ __forceinline__ T operator ()(float y, float x) const
    {
+    #if CV_CUDEV_ARCH < 300
+        // Use the texture reference
+        return tex2D(CvCudevTextureRef<T>::ref, x, y);
+    #else
+        // Use the texture object
        return tex2D<T>(texObj, x, y);
+    #endif
    }
 };

 template <typename T> struct Texture : TexturePtr<T>
 {
    int rows, cols;
+    bool cc30;

    __host__ explicit Texture(const GlobPtrSz<T>& mat,
                              bool normalizedCoords = false,
                              cudaTextureFilterMode filterMode = cudaFilterModePoint,
                              cudaTextureAddressMode addressMode = cudaAddressModeClamp)
    {
-        CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
+        cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);

        rows = mat.rows;
        cols = mat.cols;

-        cudaResourceDesc texRes;
-        std::memset(&texRes, 0, sizeof(texRes));
-        texRes.resType = cudaResourceTypePitch2D;
-        texRes.res.pitch2D.devPtr = mat.data;
-        texRes.res.pitch2D.height = mat.rows;
-        texRes.res.pitch2D.width = mat.cols;
-        texRes.res.pitch2D.pitchInBytes = mat.step;
-        texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
+        if (cc30)
+        {
+            // Use the texture object
+            cudaResourceDesc texRes;
+            std::memset(&texRes, 0, sizeof(texRes));
+            texRes.resType = cudaResourceTypePitch2D;
+            texRes.res.pitch2D.devPtr = mat.data;
+            texRes.res.pitch2D.height = mat.rows;
+            texRes.res.pitch2D.width = mat.cols;
+            texRes.res.pitch2D.pitchInBytes = mat.step;
+            texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();

-        cudaTextureDesc texDescr;
-        std::memset(&texDescr, 0, sizeof(texDescr));
-        texDescr.addressMode[0] = addressMode;
-        texDescr.addressMode[1] = addressMode;
-        texDescr.addressMode[2] = addressMode;
-        texDescr.filterMode = filterMode;
-        texDescr.readMode = cudaReadModeElementType;
-        texDescr.normalizedCoords = normalizedCoords;
+            cudaTextureDesc texDescr;
+            std::memset(&texDescr, 0, sizeof(texDescr));
+            texDescr.normalizedCoords = normalizedCoords;
+            texDescr.filterMode = filterMode;
+            texDescr.addressMode[0] = addressMode;
+            texDescr.addressMode[1] = addressMode;
+            texDescr.addressMode[2] = addressMode;
+            texDescr.readMode = cudaReadModeElementType;

-        CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
-    }
-
-    __host__ explicit Texture(const GpuMat_<T>& mat,
-                              bool normalizedCoords = false,
-                              cudaTextureFilterMode filterMode = cudaFilterModePoint,
-                              cudaTextureAddressMode addressMode = cudaAddressModeClamp)
-    {
-        CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
-
-        rows = mat.rows;
-        cols = mat.cols;
-
-        cudaResourceDesc texRes;
-        std::memset(&texRes, 0, sizeof(texRes));
-        texRes.resType = cudaResourceTypePitch2D;
-        texRes.res.pitch2D.devPtr = mat.data;
-        texRes.res.pitch2D.height = mat.rows;
-        texRes.res.pitch2D.width = mat.cols;
-        texRes.res.pitch2D.pitchInBytes = mat.step;
-        texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
-
-        cudaTextureDesc texDescr;
-        std::memset(&texDescr, 0, sizeof(texDescr));
-        texDescr.addressMode[0] = addressMode;
-        texDescr.addressMode[1] = addressMode;
-        texDescr.addressMode[2] = addressMode;
-        texDescr.filterMode = filterMode;
-        texDescr.readMode = cudaReadModeElementType;
-        texDescr.normalizedCoords = normalizedCoords;
-
-        CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
+            CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
+        }
+        else
+        {
+            // Use the texture reference
+            CvCudevTextureRef<T>::bind(mat, normalizedCoords, filterMode, addressMode);
+        }
    }

    __host__ ~Texture()
    {
-        cudaDestroyTextureObject(this->texObj);
+        if (cc30)
+        {
+            // Use the texture object
+            cudaDestroyTextureObject(this->texObj);
+        }
+        else
+        {
+            // Use the texture reference
+            CvCudevTextureRef<T>::unbind();
+        }
    }
 };

--- a/modules/cudev/include/opencv2/cudev/util/atomic.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/atomic.hpp
@@ -64,11 +64,23 @@ __device__ __forceinline__ uint atomicAdd(uint* address, uint val)

 __device__ __forceinline__ float atomicAdd(float* address, float val)
 {
+#if CV_CUDEV_ARCH >= 200
    return ::atomicAdd(address, val);
+#else
+    int* address_as_i = (int*) address;
+    int old = *address_as_i, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_i, assumed,
+            __float_as_int(val + __int_as_float(assumed)));
+    } while (assumed != old);
+    return __int_as_float(old);
+#endif
 }

 __device__ static double atomicAdd(double* address, double val)
 {
+#if CV_CUDEV_ARCH >= 130
    unsigned long long int* address_as_ull = (unsigned long long int*) address;
    unsigned long long int old = *address_as_ull, assumed;
    do {
@@ -77,6 +89,11 @@ __device__ static double atomicAdd(double* address, double val)
            __double_as_longlong(val + __longlong_as_double(assumed)));
    } while (assumed != old);
    return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
 }

 // atomicMin
@@ -93,6 +110,7 @@ __device__ __forceinline__ uint atomicMin(uint* address, uint val)

 __device__ static float atomicMin(float* address, float val)
 {
+#if CV_CUDEV_ARCH >= 120
    int* address_as_i = (int*) address;
    int old = *address_as_i, assumed;
    do {
@@ -101,10 +119,16 @@ __device__ static float atomicMin(float* address, float val)
            __float_as_int(::fminf(val, __int_as_float(assumed))));
    } while (assumed != old);
    return __int_as_float(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0f;
+#endif
 }

 __device__ static double atomicMin(double* address, double val)
 {
+#if CV_CUDEV_ARCH >= 130
    unsigned long long int* address_as_ull = (unsigned long long int*) address;
    unsigned long long int old = *address_as_ull, assumed;
    do {
@@ -113,6 +137,11 @@ __device__ static double atomicMin(double* address, double val)
            __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
    } while (assumed != old);
    return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
 }

 // atomicMax
@@ -129,6 +158,7 @@ __device__ __forceinline__ uint atomicMax(uint* address, uint val)

 __device__ static float atomicMax(float* address, float val)
 {
+#if CV_CUDEV_ARCH >= 120
    int* address_as_i = (int*) address;
    int old = *address_as_i, assumed;
    do {
@@ -137,10 +167,16 @@ __device__ static float atomicMax(float* address, float val)
            __float_as_int(::fmaxf(val, __int_as_float(assumed))));
    } while (assumed != old);
    return __int_as_float(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0f;
+#endif
 }

 __device__ static double atomicMax(double* address, double val)
 {
+#if CV_CUDEV_ARCH >= 130
    unsigned long long int* address_as_ull = (unsigned long long int*) address;
    unsigned long long int old = *address_as_ull, assumed;
    do {
@@ -149,6 +185,11 @@ __device__ static double atomicMax(double* address, double val)
            __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
    } while (assumed != old);
    return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
 }

 }}
--- a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
@@ -228,7 +228,11 @@ template <> __device__ __forceinline__ int saturate_cast<int>(float v)
 }
 template <> __device__ __forceinline__ int saturate_cast<int>(double v)
 {
+#if CV_CUDEV_ARCH >= 130
    return __double2int_rn(v);
+#else
+    return saturate_cast<int>((float) v);
+#endif
 }

 template <> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
@@ -256,7 +260,11 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(float v)
 }
 template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
 {
+#if CV_CUDEV_ARCH >= 130
    return __double2uint_rn(v);
+#else
+    return saturate_cast<uint>((float) v);
+#endif
 }

 }}
--- a/modules/flann/include/opencv2/flann/any.h
+++ b/modules/flann/include/opencv2/flann/any.h
@@ -257,8 +257,7 @@ public:
    const T& cast() const
    {
        if (policy->type() != typeid(T)) throw anyimpl::bad_any_cast();
-        void* obj = const_cast<void*>(object);
-        T* r = reinterpret_cast<T*>(policy->get_value(&obj));
+        T* r = reinterpret_cast<T*>(policy->get_value(const_cast<void **>(&object)));
        return *r;
    }

--- a/modules/gpuwarping/src/cuda/resize.cu
+++ b/modules/gpuwarping/src/cuda/resize.cu
@@ -194,7 +194,7 @@ namespace cv { namespace gpu { namespace cudev
    }

    template <typename T>
-    void call_resize_nearest_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
+    void call_resize_nearest_tex(const PtrStepSz<T>& /*src*/, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
    {
        const dim3 block(32, 8);
        const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -301,7 +301,7 @@ namespace cv { namespace gpu { namespace cudev

    template <typename T> struct ResizeNearestDispatcher
    {
-        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
+        static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
        {
            call_resize_nearest_glob(src, dst, fy, fx, stream);
        }
--- a/modules/imgproc/doc/feature_detection.rst
+++ b/modules/imgproc/doc/feature_detection.rst
@@ -496,6 +496,110 @@ And this is the output of the above program in case of the probabilistic Hough t

 .. image:: pics/houghp.png

+.. seealso::
+
+    :ocv:class:`LineSegmentDetector`
+
+
+
+LineSegmentDetector
+-------------------
+Line segment detector class, following the algorithm described at [Rafael12]_.
+
+.. ocv:class:: LineSegmentDetector : public Algorithm
+
+
+createLineSegmentDetectorPtr
+----------------------------
+Creates a smart pointer to a LineSegmentDetector object and initializes it.
+
+.. ocv:function:: Ptr<LineSegmentDetector> createLineSegmentDetectorPtr(int _refine = LSD_REFINE_STD, double _scale = 0.8, double _sigma_scale = 0.6, double _quant = 2.0, double _ang_th = 22.5, double _log_eps = 0, double _density_th = 0.7, int _n_bins = 1024)
+
+    :param _refine: The way found lines will be refined:
+
+        * **LSD_REFINE_NONE** - No refinement applied.
+
+        * **LSD_REFINE_STD**  - Standard refinement is applied. E.g. breaking arches into smaller straighter line approximations.
+
+        * **LSD_REFINE_ADV**  - Advanced refinement. Number of false alarms is calculated, lines are refined through increase of precision, decrement in size, etc.
+
+    :param scale: The scale of the image that will be used to find the lines. Range (0..1].
+
+    :param sigma_scale: Sigma for Gaussian filter. It is computed as sigma = _sigma_scale/_scale.
+
+    :param quant: Bound to the quantization error on the gradient norm.
+
+    :param ang_th: Gradient angle tolerance in degrees.
+
+    :param log_eps: Detection threshold: -log10(NFA) > log_eps. Used only when advancent refinement is chosen.
+
+    :param density_th: Minimal density of aligned region points in the enclosing rectangle.
+
+    :param n_bins: Number of bins in pseudo-ordering of gradient modulus.
+
+The LineSegmentDetector algorithm is defined using the standard values. Only advanced users may want to edit those, as to tailor it for their own application.
+
+
+LineSegmentDetector::detect
+---------------------------
+Finds lines in the input image. See the lsd_lines.cpp sample for possible usage.
+
+.. ocv:function:: void LineSegmentDetector::detect(const InputArray _image, OutputArray _lines, OutputArray width = noArray(), OutputArray prec = noArray(), OutputArray nfa = noArray())
+
+    :param _image A grayscale (CV_8UC1) input image.
+        If only a roi needs to be selected, use ::
+        lsd_ptr->detect(image(roi), lines, ...);
+        lines += Scalar(roi.x, roi.y, roi.x, roi.y);
+
+    :param lines: A vector of Vec4i elements specifying the beginning and ending point of a line. Where Vec4i is (x1, y1, x2, y2), point 1 is the start, point 2 - end. Returned lines are strictly oriented depending on the gradient.
+
+    :param width: Vector of widths of the regions, where the lines are found. E.g. Width of line.
+
+    :param prec: Vector of precisions with which the lines are found.
+
+    :param nfa: Vector containing number of false alarms in the line region, with precision of 10%. The bigger the value, logarithmically better the detection.
+
+        * -1 corresponds to 10 mean false alarms
+
+        * 0 corresponds to 1 mean false alarm
+
+        * 1 corresponds to 0.1 mean false alarms
+
+    This vector will be calculated only when the objects type is LSD_REFINE_ADV.
+
+This is the output of the default parameters of the algorithm on the above shown image.
+
+.. image:: pics/building_lsd.png
+
+.. note::
+
+   * An example using the LineSegmentDetector can be found at opencv_source_code/samples/cpp/lsd_lines.cpp
+
+LineSegmentDetector::drawSegments
+---------------------------------
+Draws the line segments on a given image.
+
+.. ocv:function:: void LineSegmentDetector::drawSegments(InputOutputArray _image, InputArray lines)
+
+    :param image: The image, where the liens will be drawn. Should be bigger or equal to the image, where the lines were found.
+
+    :param lines: A vector of the lines that needed to be drawn.
+
+
+LineSegmentDetector::compareSegments
+------------------------------------
+Draws two groups of lines in blue and red, counting the non overlapping (mismatching) pixels.
+
+.. ocv:function:: int LineSegmentDetector::compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image = noArray())
+
+    :param size: The size of the image, where lines1 and lines2 were found.
+
+    :param lines1: The first group of lines that needs to be drawn. It is visualized in blue color.
+
+    :param lines2: The second group of lines. They visualized in red color.
+
+    :param image: Optional image, where the lines will be drawn. The image should be color in order for lines1 and lines2 to be drawn in the above mentioned colors.
+


 preCornerDetect
@@ -542,3 +646,5 @@ The corners can be found as local maximums of the functions, as shown below: ::
 .. [Shi94] J. Shi and C. Tomasi. *Good Features to Track*. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 593-600, June 1994.

 .. [Yuen90] Yuen, H. K. and Princen, J. and Illingworth, J. and Kittler, J., *Comparative study of Hough transform methods for circle finding*. Image Vision Comput. 8 1, pp 71–77 (1990)
+
+.. [Rafael12] Rafael Grompone von Gioi, Jérémie Jakubowicz, Jean-Michel Morel, and Gregory Randall, LSD: a Line Segment Detector, Image Processing On Line, vol. 2012. http://dx.doi.org/10.5201/ipol.2012.gjmr-lsd
--- a/modules/imgproc/doc/filtering.rst
+++ b/modules/imgproc/doc/filtering.rst
@@ -412,6 +412,28 @@ http://www.dai.ed.ac.uk/CVonline/LOCAL\_COPIES/MANDUCHI1/Bilateral\_Filtering.ht
 This filter does not work inplace.


+adaptiveBilateralFilter
+-----------------------
+Applies the adaptive bilateral filter to an image.
+
+.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT )
+
+.. ocv:pyfunction:: cv2.adaptiveBilateralFilter(src, ksize, sigmaSpace[, dst[, anchor[, borderType]]]) -> dst
+
+    :param src: Source 8-bit, 1-channel or 3-channel image.
+
+    :param dst: Destination image of the same size and type as  ``src`` .
+
+    :param ksize: filter kernel size.
+
+    :param sigmaSpace: Filter sigma in the coordinate space. It has similar meaning with ``sigmaSpace`` in ``bilateralFilter``.
+
+    :param anchor: anchor point; default value ``Point(-1,-1)`` means that the anchor is at the kernel center. Only default value is supported now.
+
+    :param borderType: border mode used to extrapolate pixels outside of the image.
+
+The function applies adaptive bilateral filtering to the input image. This filter is similar to ``bilateralFilter``, in that dissimilarity from and distance to the center pixel is punished. Instead of using ``sigmaColor``, we employ the variance of pixel values in the neighbourhood.
+


 blur
--- a/modules/imgproc/doc/pics/building_lsd.png
+++ b/modules/imgproc/doc/pics/building_lsd.png
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -908,7 +908,7 @@ class LineSegmentDetector : public Algorithm
 {
 public:
 /**
- * Detect lines in the input image with the specified ROI.
+ * Detect lines in the input image.
 *
 * @param _image    A grayscale(CV_8UC1) input image.
 *                  If only a roi needs to be selected, use
@@ -917,8 +917,6 @@ public:
 * @param _lines    Return: A vector of Vec4i elements specifying the beginning and ending point of a line.
 *                          Where Vec4i is (x1, y1, x2, y2), point 1 is the start, point 2 - end.
 *                          Returned lines are strictly oriented depending on the gradient.
- * @param _roi      Return: ROI of the image, where lines are to be found. If specified, the returning
- *                          lines coordinates are image wise.
 * @param width     Return: Vector of widths of the regions, where the lines are found. E.g. Width of line.
 * @param prec      Return: Vector of precisions with which the lines are found.
 * @param nfa       Return: Vector containing number of false alarms in the line region, with precision of 10%.
@@ -939,18 +937,19 @@ public:
 *                  Should have the size of the image, where the lines were found
 * @param lines     The lines that need to be drawn
 */
-    virtual void drawSegments(InputOutputArray image, InputArray lines) = 0;
+    virtual void drawSegments(InputOutputArray _image, InputArray lines) = 0;

 /**
 * Draw both vectors on the image canvas. Uses blue for lines 1 and red for lines 2.
 *
- * @param image     The image, where lines will be drawn.
- *                  Should have the size of the image, where the lines were found
+ * @param size      The size of the image, where lines were found.
 * @param lines1    The first lines that need to be drawn. Color - Blue.
 * @param lines2    The second lines that need to be drawn. Color - Red.
+ * @param image     Optional image, where lines will be drawn.
+ *                  Should have the size of the image, where the lines were found
 * @return          The number of mismatching pixels between lines1 and lines2.
 */
-    virtual int compareSegments(const Size& size, InputArray lines1, InputArray lines2, Mat* image = 0) = 0;
+    virtual int compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image = noArray()) = 0;

    virtual ~LineSegmentDetector() {};
 };
@@ -1065,6 +1064,11 @@ CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
                                   double sigmaColor, double sigmaSpace,
                                   int borderType = BORDER_DEFAULT );

+//! smooths the image using adaptive bilateral filter
+CV_EXPORTS_W void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize,
+                                           double sigmaSpace, Point anchor=Point(-1, -1),
+                                           int borderType=BORDER_DEFAULT );
+
 //! smooths the image using the box filter. Each pixel is processed in O(1) time
 CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
                             Size ksize, Point anchor = Point(-1,-1),
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -254,19 +254,19 @@ bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
    return ok;
 }

-IppStatus __stdcall ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
         IppiSize roiSize, const int *dstOrder)
 {
    return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u);
 }

-IppStatus __stdcall ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
         IppiSize roiSize, const int *dstOrder)
 {
    return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u);
 }

-IppStatus __stdcall ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
+static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
         IppiSize roiSize, const int *dstOrder)
 {
    return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f);
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -50,9 +50,73 @@
 #include <iostream>
 #include <vector>

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static IppStatus sts = ippInit();
+#endif
+
 namespace cv
 {

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
+    typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
+    typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
+    typedef IppStatus (CV_STDCALL* ippiResizeSqrPixelFunc)(const void*, IppiSize, int, IppiRect, void*, int, IppiRect, double, double, double, double, int, Ipp8u *);
+
+    template <int channels, typename Type>
+    bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
+    {
+        Type values[channels];
+        for( int i = 0; i < channels; i++ )
+            values[i] = (Type)value[i];
+        return func(values, dataPointer, step, size) >= 0;
+    }
+
+    bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
+    {
+        if( channels == 1 )
+        {
+            switch( depth )
+            {
+            case CV_8U:
+                return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
+            case CV_16U:
+                return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
+            case CV_32F:
+                return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
+            }
+        }
+        else
+        {
+            if( channels == 3 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
+                case CV_16U:
+                    return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
+                case CV_32F:
+                    return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
+                }
+            }
+            else if( channels == 4 )
+            {
+                switch( depth )
+                {
+                case CV_8U:
+                    return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
+                case CV_16U:
+                    return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
+                case CV_32F:
+                    return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
+                }
+            }
+        }
+        return false;
+    }
+#endif
+
 /************** interpolation formulas and tables ***************/

 const int INTER_RESIZE_COEF_BITS=11;
@@ -1795,6 +1859,45 @@ static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, Dec
    return k;
 }

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPresizeInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPresizeInvoker(Mat &_src, Mat &_dst, double &_inv_scale_x, double &_inv_scale_y, int _mode, ippiResizeSqrPixelFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int cn = src.channels();
+          IppiRect srcroi = { 0, range.start, src.cols, range.end - range.start };
+          int dsty = CV_IMIN(cvRound(range.start * inv_scale_y), dst.rows);
+          int dstwidth = CV_IMIN(cvRound(src.cols * inv_scale_x), dst.cols);
+          int dstheight = CV_IMIN(cvRound(range.end * inv_scale_y), dst.rows);
+          IppiRect dstroi = { 0, dsty, dstwidth, dstheight - dsty };
+          int bufsize;
+          ippiResizeGetBufSize( srcroi, dstroi, cn, mode, &bufsize );
+          Ipp8u *buf;
+          buf = ippsMalloc_8u( bufsize );
+          IppStatus sts;
+          if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, buf ) < 0 )
+              *ok = false;
+          ippsFree(buf);
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double inv_scale_x;
+    double inv_scale_y;
+    int mode;
+    ippiResizeSqrPixelFunc func;
+    bool *ok;
+    const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
+};
+#endif

 }

@@ -1937,6 +2040,34 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
    double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
    int k, sx, sy, dx, dy;

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0;
+    int type = src.type();
+    ippiResizeSqrPixelFunc ippFunc =
+        type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R :
+        type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R :
+        type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R :
+        type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R :
+        type == CV_16UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C3R :
+        type == CV_16UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C4R :
+        type == CV_16SC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C1R :
+        type == CV_16SC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C3R :
+        type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R :
+        type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R :
+        type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R :
+        type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R :
+        0;
+    if( ippFunc && mode != 0 )
+    {
+        bool ok;
+        Range range(0, src.rows);
+        IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, ippFunc, &ok);
+        parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+        if( ok )
+            return;
+    }
+#endif
+
    if( interpolation == INTER_NEAREST )
    {
        resizeNN( src, dst, inv_scale_x, inv_scale_y );
@@ -3446,6 +3577,49 @@ private:
    double *M;
 };

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpAffineInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = { src.cols, src.rows };
+          IppiRect srcroi = { 0, 0, src.cols, src.rows };
+          IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
+          int cnn = src.channels();
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = { dst.cols, range.end - range.start };
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[2][3];
+    int mode;
+    int borderType;
+    Scalar borderValue;
+    ippiWarpAffineBackFunc func;
+    bool *ok;
+    const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
+};
+#endif
+
 }


@@ -3492,6 +3666,50 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
    const int AB_BITS = MAX(10, (int)INTER_BITS);
    const int AB_SCALE = 1 << AB_BITS;

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
+        ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
+    {
+        int type = src.type();
+        ippiWarpAffineBackFunc ippFunc =
+            type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
+            0;
+        int mode =
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[2][3];
+            for( int i = 0; i < 2; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
+
    for( x = 0; x < dst.cols; x++ )
    {
        adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@@ -3599,6 +3817,50 @@ private:
    Scalar borderValue;
 };

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPwarpPerspectiveInvoker :
+    public ParallelLoopBody
+{
+public:
+    IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          IppiSize srcsize = {src.cols, src.rows};
+          IppiRect srcroi = {0, 0, src.cols, src.rows};
+          IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
+          int cnn = src.channels();
+
+          if( borderType == BORDER_CONSTANT )
+          {
+              IppiSize setSize = {dst.cols, range.end - range.start};
+              void *dataPointer = dst.data + dst.step[0] * range.start;
+              if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
+              {
+                  *ok = false;
+                  return;
+              }
+          }
+          if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double (&coeffs)[3][3];
+    int mode;
+    int borderType;
+    const Scalar borderValue;
+    ippiWarpPerspectiveBackFunc func;
+    bool *ok;
+    const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
+};
+#endif
+
 }

 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
@@ -3629,6 +3891,50 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
    if( !(flags & WARP_INVERSE_MAP) )
         invert(matM, matM);

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    int depth = src.depth();
+    int channels = src.channels();
+    if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
+        ( channels == 1 || channels == 3 || channels == 4 ) &&
+        ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
+    {
+        int type = src.type();
+        ippiWarpPerspectiveBackFunc ippFunc =
+            type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
+            type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
+            type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
+            type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
+            type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
+            type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
+            type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
+            type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
+            type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
+            0;
+        int mode =
+            flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
+            flags == INTER_NEAREST ? IPPI_INTER_NN :
+            flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
+            0;
+        if( mode && ippFunc )
+        {
+            double coeffs[3][3];
+            for( int i = 0; i < 3; i++ )
+            {
+                for( int j = 0; j < 3; j++ )
+                {
+                    coeffs[i][j] = matM.at<double>(i, j);
+                }
+            }
+            bool ok;
+            Range range(0, dst.rows);
+            IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
+            parallel_for_(range, invoker, dst.total()/(double)(1<<16));
+            if( ok )
+                return;
+        }
+    }
+#endif
+
    Range range(0, dst.rows);
    warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
    parallel_for_(range, invoker, dst.total()/(double)(1<<16));
--- a/modules/imgproc/src/lsd.cpp
+++ b/modules/imgproc/src/lsd.cpp
@@ -1,5 +1,6 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
@@ -9,8 +10,7 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -185,7 +185,7 @@ public:
        double _log_eps = 0, double _density_th = 0.7, int _n_bins = 1024);

 /**
- * Detect lines in the input image with the specified ROI.
+ * Detect lines in the input image.
 *
 * @param _image    A grayscale(CV_8UC1) input image.
 *                  If only a roi needs to be selected, use
@@ -194,8 +194,6 @@ public:
 * @param _lines    Return: A vector of Vec4i elements specifying the beginning and ending point of a line.
 *                          Where Vec4i is (x1, y1, x2, y2), point 1 is the start, point 2 - end.
 *                          Returned lines are strictly oriented depending on the gradient.
- * @param _roi      Return: ROI of the image, where lines are to be found. If specified, the returning
- *                          lines coordinates are image wise.
 * @param width     Return: Vector of widths of the regions, where the lines are found. E.g. Width of line.
 * @param prec      Return: Vector of precisions with which the lines are found.
 * @param nfa       Return: Vector containing number of false alarms in the line region, with precision of 10%.
@@ -216,18 +214,19 @@ public:
 *                  Should have the size of the image, where the lines were found
 * @param lines     The lines that need to be drawn
 */
-    void drawSegments(InputOutputArray image, InputArray lines);
+    void drawSegments(InputOutputArray _image, InputArray lines);

 /**
 * Draw both vectors on the image canvas. Uses blue for lines 1 and red for lines 2.
 *
- * @param image     The image, where lines will be drawn.
- *                  Should have the size of the image, where the lines were found
+ * @param size      The size of the image, where lines1 and lines2 were found.
 * @param lines1    The first lines that need to be drawn. Color - Blue.
 * @param lines2    The second lines that need to be drawn. Color - Red.
+ * @param image     An optional image, where lines will be drawn.
+ *                  Should have the size of the image, where the lines were found
 * @return          The number of mismatching pixels between lines1 and lines2.
 */
-    int compareSegments(const Size& size, InputArray lines1, InputArray lines2, Mat* image = 0);
+    int compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image = noArray());

 private:
    Mat image;
@@ -336,7 +335,7 @@ private:
 * @param rec       Return: The generated rectangle.
 */
    void region2rect(const std::vector<RegionPoint>& reg, const int reg_size, const double reg_angle,
-                    const double prec, const double p, rect& rec) const;
+                     const double prec, const double p, rect& rec) const;

 /**
 * Compute region's angle as the principal inertia axis of the region.
@@ -410,7 +409,7 @@ LineSegmentDetectorImpl::LineSegmentDetectorImpl(int _refine, double _scale, dou
              _n_bins > 0);
 }

-void LineSegmentDetectorImpl::detect(const InputArray _image, OutputArray _lines,
+void LineSegmentDetectorImpl::detect(InputArray _image, OutputArray _lines,
                OutputArray _width, OutputArray _prec, OutputArray _nfa)
 {
    Mat_<double> img = _image.getMat();
@@ -1150,7 +1149,7 @@ inline bool LineSegmentDetectorImpl::isAligned(const int& address, const double&
 }


-void LineSegmentDetectorImpl::drawSegments(InputOutputArray _image, const InputArray lines)
+void LineSegmentDetectorImpl::drawSegments(InputOutputArray _image, InputArray lines)
 {
    CV_Assert(!_image.empty() && (_image.channels() == 1 || _image.channels() == 3));

@@ -1186,10 +1185,10 @@ void LineSegmentDetectorImpl::drawSegments(InputOutputArray _image, const InputA
 }


-int LineSegmentDetectorImpl::compareSegments(const Size& size, const InputArray lines1, const InputArray lines2, Mat* _image)
+int LineSegmentDetectorImpl::compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray _image)
 {
    Size sz = size;
-    if (_image && _image->size() != size) sz = _image->size();
+    if (_image.needed() && _image.size() != size) sz = _image.size();
    CV_Assert(sz.area());

    Mat_<uchar> I1 = Mat_<uchar>::zeros(sz);
@@ -1219,14 +1218,11 @@ int LineSegmentDetectorImpl::compareSegments(const Size& size, const InputArray
    bitwise_xor(I1, I2, Ixor);
    int N = countNonZero(Ixor);

-    if (_image)
+    if (_image.needed())
    {
-        Mat Ig;
-        if (_image->channels() == 1)
-        {
-            cvtColor(*_image, *_image, CV_GRAY2BGR);
-        }
-        CV_Assert(_image->isContinuous() && I1.isContinuous() && I2.isContinuous());
+        CV_Assert(_image.channels() == 3);
+        Mat img = _image.getMatRef();
+        CV_Assert(img.isContinuous() && I1.isContinuous() && I2.isContinuous());

        for (unsigned int i = 0; i < I1.total(); ++i)
        {
@@ -1234,11 +1230,12 @@ int LineSegmentDetectorImpl::compareSegments(const Size& size, const InputArray
            uchar i2 = I2.data[i];
            if (i1 || i2)
            {
-                _image->data[3*i + 1] = 0;
-                if (i1) _image->data[3*i] = 255;
-                else _image->data[3*i] = 0;
-                if (i2) _image->data[3*i + 2] = 255;
-                else _image->data[3*i + 2] = 0;
+                unsigned int base_idx = i * 3;
+                if (i1) img.data[base_idx] = 255;
+                else img.data[base_idx] = 0;
+                img.data[base_idx + 1] = 0;
+                if (i2) img.data[base_idx + 2] = 255;
+                else img.data[base_idx + 2] = 0;
            }
        }
    }
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@@ -1213,11 +1213,10 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
 }

 static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
-    InputArray _kernel,
-    const Point &anchor, int iterations,
+    const Mat& _kernel, Point anchor, int iterations,
    int borderType, const Scalar &borderValue)
 {
-    Mat src = _src.getMat(), kernel = _kernel.getMat();
+    Mat src = _src.getMat(), kernel = _kernel;
    if( !( src.depth() == CV_8U || src.depth() == CV_32F ) || ( iterations > 1 ) ||
        !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) )
        || !( op == MORPH_DILATE || op == MORPH_ERODE) )
@@ -1248,9 +1247,6 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,

    }
    Size ksize = kernel.data ? kernel.size() : Size(3,3);
-    Point normanchor = normalizeAnchor(anchor, ksize);
-
-    CV_Assert( normanchor.inside(Rect(0, 0, ksize.width, ksize.height)) );

    _dst.create( src.size(), src.type() );
    Mat dst = _dst.getMat();
@@ -1265,7 +1261,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
    if( !kernel.data )
    {
        ksize = Size(1+iterations*2,1+iterations*2);
-        normanchor = Point(iterations, iterations);
+        anchor = Point(iterations, iterations);
        rectKernel = true;
        iterations = 1;
    }
@@ -1273,7 +1269,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
    {
        ksize = Size(ksize.width + (iterations-1)*(ksize.width-1),
             ksize.height + (iterations-1)*(ksize.height-1)),
-        normanchor = Point(normanchor.x*iterations, normanchor.y*iterations);
+        anchor = Point(anchor.x*iterations, anchor.y*iterations);
        kernel = Mat();
        rectKernel = true;
        iterations = 1;
@@ -1283,7 +1279,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
    if( iterations > 1 )
        return false;

-    return IPPMorphReplicate( op, src, dst, kernel, ksize, normanchor, rectKernel );
+    return IPPMorphReplicate( op, src, dst, kernel, ksize, anchor, rectKernel );
 }
 #endif

@@ -1292,18 +1288,19 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
                     Point anchor, int iterations,
                     int borderType, const Scalar& borderValue )
 {
-
-#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-    if( IPPMorphOp(op, _src, _dst, _kernel, anchor, iterations, borderType, borderValue) )
-        return;
-#endif
-
-    Mat src = _src.getMat(), kernel = _kernel.getMat();
+    Mat kernel = _kernel.getMat();
    Size ksize = kernel.data ? kernel.size() : Size(3,3);
    anchor = normalizeAnchor(anchor, ksize);

    CV_Assert( anchor.inside(Rect(0, 0, ksize.width, ksize.height)) );

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
+        return;
+#endif
+
+    Mat src = _src.getMat();
+
    _dst.create( src.size(), src.type() );
    Mat dst = _dst.getMat();

--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -1879,6 +1879,41 @@ private:
    float *space_weight, *color_weight;
 };

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+class IPPBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    IPPBilateralFilter_8u_Invoker(Mat &_src, Mat &_dst, double _sigma_color, double _sigma_space, int _radius, bool *_ok) :
+      ParallelLoopBody(), src(_src), dst(_dst), sigma_color(_sigma_color), sigma_space(_sigma_space), radius(_radius), ok(_ok)
+      {
+          *ok = true;
+      }
+
+      virtual void operator() (const Range& range) const
+      {
+          int d = radius * 2 + 1;
+          IppiSize kernel = {d, d};
+          IppiSize roi={dst.cols, range.end - range.start};
+          int bufsize=0;
+          ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
+          AutoBuffer<uchar> buf(bufsize);
+          IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
+          ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec );
+          if( ippiFilterBilateral_8u_C1R( src.ptr<uchar>(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr<uchar>(range.start), (int)dst.step[0], roi, kernel, pSpec ) < 0)
+              *ok = false;
+      }
+private:
+    Mat &src;
+    Mat &dst;
+    double sigma_color;
+    double sigma_space;
+    int radius;
+    bool *ok;
+    const IPPBilateralFilter_8u_Invoker& operator= (const IPPBilateralFilter_8u_Invoker&);
+};
+#endif
+
 static void
 bilateralFilter_8u( const Mat& src, Mat& dst, int d,
    double sigma_color, double sigma_space,
@@ -1908,32 +1943,19 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
    radius = MAX(radius, 1);
    d = radius*2 + 1;

-#if 0 && defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
-    if(cn == 1)
-    {
-        IppiSize kernel = {d, d};
-        IppiSize roi={src.cols, src.rows};
-        int bufsize=0;
-        ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
-        AutoBuffer<uchar> buf(bufsize+128);
-        IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
-        ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, sigma_color*sigma_color, sigma_space*sigma_space, 1, pSpec );
-        Mat tsrc;
-        const Mat* psrc = &src;
-        if( src.data == dst.data )
-        {
-            src.copyTo(tsrc);
-            psrc = &tsrc;
-        }
-        if( ippiFilterBilateral_8u_C1R(psrc->data, (int)psrc->step[0],
-                                       dst.data, (int)dst.step[0],
-                                       roi, kernel, pSpec) >= 0 )
-            return;
-    }
-#endif
    Mat temp;
    copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );

+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if( cn == 1 )
+    {
+        bool ok;
+        IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok );
+        parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16));
+        if( ok ) return;
+    }
+#endif
+
    std::vector<float> _color_weight(cn*256);
    std::vector<float> _space_weight(d*d);
    std::vector<int> _space_ofs(d*d);
@@ -2258,6 +2280,236 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,
        "Bilateral filtering is only implemented for 8u and 32f images" );
 }

+
+/****************************************************************************************\
+                                  Adaptive Bilateral Filtering
+\****************************************************************************************/
+
+namespace cv
+{
+#define CALCVAR 1
+#define FIXED_WEIGHT 0
+
+class adaptiveBilateralFilter_8u_Invoker :
+    public ParallelLoopBody
+{
+public:
+    adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, Point _anchor) :
+        temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), anchor(_anchor)
+    {
+        if( sigma_space <= 0 )
+            sigma_space = 1;
+        CV_Assert((ksize.width & 1) && (ksize.height & 1));
+        space_weight.resize(ksize.width * ksize.height);
+        double sigma2 = sigma_space * sigma_space;
+        int idx = 0;
+        int w = ksize.width / 2;
+        int h = ksize.height / 2;
+        for(int y=-h; y<=h; y++)
+            for(int x=-w; x<=w; x++)
+        {
+            space_weight[idx++] = (float)(sigma2 / (sigma2 + x * x + y * y));
+        }
+    }
+    virtual void operator()(const Range& range) const
+    {
+        int cn = dest->channels();
+        int anX = anchor.x;
+
+        const uchar *tptr;
+
+        for(int i = range.start;i < range.end; i++)
+        {
+            int startY = i;
+            if(cn == 1)
+            {
+                float var;
+                int currVal;
+                int sumVal = 0;
+                int sumValSqr = 0;
+                int currValCenter;
+                int currWRTCenter;
+                float weight;
+                float totalWeight = 0.;
+                float tmpSum = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal = 0;
+                    sumValSqr= 0;
+                    totalWeight = 0.;
+                    tmpSum = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width  - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width );
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal = tptr[cn*(y+anX)];
+                            sumVal += currVal;
+                            sumValSqr += (currVal *currVal);
+                        }
+                    }
+                    var = ( (sumValSqr * howManyAll)- sumVal * sumVal )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2);
+                    currValCenter =tptr[j+cn*anX];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight = 1.0;
+#else
+                            currVal = tptr[cn*(y+anX)];
+                            currWRTCenter = currVal - currValCenter;
+
+                            weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];;
+#endif
+                            tmpSum += ((float)tptr[cn*(y+anX)] * weight);
+                            totalWeight += weight;
+                        }
+                    }
+                    tmpSum /= totalWeight;
+
+                   dest->at<uchar>(startY ,j)= static_cast<uchar>(tmpSum);
+                }
+            }
+            else
+            {
+                assert(cn == 3);
+                float var_b, var_g, var_r;
+                int currVal_b, currVal_g, currVal_r;
+                int sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                int sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                int currValCenter_b= 0, currValCenter_g= 0, currValCenter_r= 0;
+                int currWRTCenter_b, currWRTCenter_g, currWRTCenter_r;
+                float weight_b, weight_g, weight_r;
+                float totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                float tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                for(int j = 0;j < dest->cols *cn; j+=cn)
+                {
+                    sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
+                    sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
+                    totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
+                    tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
+
+                    // Top row: don't sum the very last element
+                    int startLMJ = 0;
+                    int endLMJ  = ksize.width - 1;
+                    int howManyAll = (anX *2 +1)*(ksize.width);
+#if CALCVAR
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+                            currVal_b = tptr[cn*(y+anX)], currVal_g = tptr[cn*(y+anX)+1], currVal_r =tptr[cn*(y+anX)+2];
+                            sumVal_b += currVal_b;
+                            sumVal_g += currVal_g;
+                            sumVal_r += currVal_r;
+                            sumValSqr_b += (currVal_b *currVal_b);
+                            sumValSqr_g += (currVal_g *currVal_g);
+                            sumValSqr_r += (currVal_r *currVal_r);
+                        }
+                    }
+                    var_b = ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b )  /  ( (float)(howManyAll*howManyAll));
+                    var_g = ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g )  /  ( (float)(howManyAll*howManyAll));
+                    var_r = ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r )  /  ( (float)(howManyAll*howManyAll));
+#else
+                    var_b = 900.0; var_g = 900.0;var_r = 900.0;
+#endif
+                    startLMJ = 0;
+                    endLMJ = ksize.width;
+                    tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2) + j;
+                    currValCenter_b =tptr[cn*anX], currValCenter_g =tptr[cn*anX+1], currValCenter_r =tptr[cn*anX+2];
+                    for(int x = startLMJ; x< endLMJ; x++)
+                    {
+                        tptr = temp->ptr(startY + x) +j;
+                        for(int y=-anX; y<=anX; y++)
+                        {
+#if FIXED_WEIGHT
+                            weight_b = 1.0;
+                            weight_g = 1.0;
+                            weight_r = 1.0;
+#else
+                            currVal_b = tptr[cn*(y+anX)];currVal_g=tptr[cn*(y+anX)+1];currVal_r=tptr[cn*(y+anX)+2];
+                            currWRTCenter_b = currVal_b - currValCenter_b;
+                            currWRTCenter_g = currVal_g - currValCenter_g;
+                            currWRTCenter_r = currVal_r - currValCenter_r;
+
+                            float cur_spw = space_weight[x*ksize.width+y+anX];
+                            weight_b = var_b / ( var_b + (currWRTCenter_b * currWRTCenter_b) ) * cur_spw;
+                            weight_g = var_g / ( var_g + (currWRTCenter_g * currWRTCenter_g) ) * cur_spw;
+                            weight_r = var_r / ( var_r + (currWRTCenter_r * currWRTCenter_r) ) * cur_spw;
+#endif
+                            tmpSum_b += ((float)tptr[cn*(y+anX)]   * weight_b);
+                            tmpSum_g += ((float)tptr[cn*(y+anX)+1] * weight_g);
+                            tmpSum_r += ((float)tptr[cn*(y+anX)+2] * weight_r);
+                            totalWeight_b += weight_b, totalWeight_g += weight_g, totalWeight_r += weight_r;
+                        }
+                    }
+                    tmpSum_b /= totalWeight_b;
+                    tmpSum_g /= totalWeight_g;
+                    tmpSum_r /= totalWeight_r;
+
+                    dest->at<uchar>(startY,j  )= static_cast<uchar>(tmpSum_b);
+                    dest->at<uchar>(startY,j+1)= static_cast<uchar>(tmpSum_g);
+                    dest->at<uchar>(startY,j+2)= static_cast<uchar>(tmpSum_r);
+                }
+            }
+        }
+    }
+private:
+    const Mat *temp;
+    Mat *dest;
+    Size ksize;
+    double sigma_space;
+    Point anchor;
+    std::vector<float> space_weight;
+};
+static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType )
+{
+    Size size = src.size();
+
+    CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
+              src.type() == dst.type() && src.size() == dst.size() &&
+              src.data != dst.data );
+    Mat temp;
+    copyMakeBorder(src, temp, anchor.x, anchor.y, anchor.x, anchor.y, borderType);
+
+    adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, anchor);
+    parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
+}
+}
+void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize,
+                                  double sigmaSpace, Point anchor, int borderType )
+{
+    Mat src = _src.getMat();
+    _dst.create(src.size(), src.type());
+    Mat dst = _dst.getMat();
+
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
+
+    anchor = normalizeAnchor(anchor,ksize);
+    if( src.depth() == CV_8U )
+        adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, anchor, borderType );
+    else
+        CV_Error( CV_StsUnsupportedFormat,
+        "Adaptive Bilateral filtering is only implemented for 8u images" );
+}
+
 //////////////////////////////////////////////////////////////////////////////////////////

 CV_IMPL void
--- a/modules/imgproc/test/test_bilateral_filter.cpp
+++ b/modules/imgproc/test/test_bilateral_filter.cpp
@@ -251,7 +251,7 @@ namespace cvtest

    int CV_BilateralFilterTest::validate_test_results(int test_case_index)
    {
-        static const double eps = 1;
+        static const double eps = 4;

        Mat reference_dst, reference_src;
        if (_src.depth() == CV_32F)
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@@ -1424,7 +1424,7 @@ TEST(Imgproc_fitLine_vector_2d, regression)

 TEST(Imgproc_fitLine_Mat_2dC2, regression)
 {
-    cv::Mat mat1(3, 1, CV_32SC2);
+    cv::Mat mat1 = Mat::zeros(3, 1, CV_32SC2);
    std::vector<float> line1;

    cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@@ -1444,7 +1444,7 @@ TEST(Imgproc_fitLine_Mat_2dC1, regression)

 TEST(Imgproc_fitLine_Mat_3dC3, regression)
 {
-    cv::Mat mat1(2, 1, CV_32SC3);
+    cv::Mat mat1 = Mat::zeros(2, 1, CV_32SC3);
    std::vector<float> line1;

    cv::fitLine(mat1, line1, CV_DIST_L2, 0 ,0 ,0);
@@ -1454,7 +1454,7 @@ TEST(Imgproc_fitLine_Mat_3dC3, regression)

 TEST(Imgproc_fitLine_Mat_3dC1, regression)
 {
-    cv::Mat mat2(2, 3, CV_32SC1);
+    cv::Mat mat2 = Mat::zeros(2, 3, CV_32SC1);
    std::vector<float> line2;

    cv::fitLine(mat2, line2, CV_DIST_L2, 0 ,0 ,0);
--- a/modules/imgproc/test/test_imgwarp_strict.cpp
+++ b/modules/imgproc/test/test_imgwarp_strict.cpp
@@ -678,8 +678,8 @@ void CV_Remap_Test::generate_test_data()
            MatIterator_<Vec2s> begin_x = mapx.begin<Vec2s>(), end_x = mapx.end<Vec2s>();
            for ( ; begin_x != end_x; ++begin_x)
            {
-                begin_x[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
-                begin_x[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
+                (*begin_x)[0] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.cols + n - 1, 0)));
+                (*begin_x)[1] = static_cast<short>(rng.uniform(static_cast<int>(_n), std::max(src.rows + n - 1, 0)));
            }

            if (interpolation != INTER_NEAREST)
--- a/modules/java/android_test/AndroidManifest.xml
+++ b/modules/java/android_test/AndroidManifest.xml
@@ -3,7 +3,7 @@
      package="org.opencv.test"
      android:versionCode="1"
      android:versionName="1.0">
-    
+
    <uses-sdk android:minSdkVersion="8" />

    <!-- We add an application tag here just so that we can indicate that
@@ -20,7 +20,7 @@
    <instrumentation android:name="org.opencv.test.OpenCVTestRunner"
                     android:targetPackage="org.opencv.test"
                     android:label="Tests for org.opencv"/>
-    
+
    <uses-permission android:name="android.permission.CAMERA"/>
    <uses-feature android:name="android.hardware.camera" />
    <uses-feature android:name="android.hardware.camera.autofocus" />
--- a/modules/java/android_test/res/layout/main.xml
+++ b/modules/java/android_test/res/layout/main.xml
@@ -4,9 +4,9 @@
    android:layout_width="fill_parent"
    android:layout_height="fill_parent"
    >
-<TextView  
-    android:layout_width="fill_parent" 
-    android:layout_height="wrap_content" 
+<TextView
+    android:layout_width="fill_parent"
+    android:layout_height="wrap_content"
    android:text="@string/hello"
    />
 </LinearLayout>
--- a/modules/java/generator/gen_java.py
+++ b/modules/java/generator/gen_java.py
@@ -632,7 +632,8 @@ def getLibVersion(version_hpp_path):
    major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
    minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
    revision = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
-    return (epoch, major, minor, revision)
+    status = re.search("^W*#\W*define\W+CV_VERSION_STATUS\W+\"(.*?)\"\W*$", version_file, re.MULTILINE).group(1)
+    return (epoch, major, minor, revision, status)

 class ConstInfo(object):
    def __init__(self, cname, name, val, addedManually=False):
@@ -799,15 +800,19 @@ public class %(jc)s {
 """ % { 'm' : self.module, 'jc' : jname } )

        if class_name == 'Core':
-            (epoch, major, minor, revision) = getLibVersion(
+            (epoch, major, minor, revision, status) = getLibVersion(
                (os.path.dirname(__file__) or '.') + '/../../core/include/opencv2/core/version.hpp')
-            version_str    = '.'.join( (epoch, major, minor, revision) )
+            version_str    = '.'.join( (epoch, major, minor, revision) ) + status
            version_suffix =  ''.join( (epoch, major, minor) )
            self.classes[class_name].imports.add("java.lang.String")
            self.java_code[class_name]["j_code"].write("""
    public static final String VERSION = "%(v)s", NATIVE_LIBRARY_NAME = "opencv_java%(vs)s";
-    public static final int VERSION_EPOCH = %(ep)s, VERSION_MAJOR = %(ma)s, VERSION_MINOR = %(mi)s, VERSION_REVISION = %(re)s;
-""" % { 'v' : version_str, 'vs' : version_suffix, 'ep' : epoch, 'ma' : major, 'mi' : minor, 're' : revision } )
+    public static final int VERSION_EPOCH = %(ep)s;
+    public static final int VERSION_MAJOR = %(ma)s;
+    public static final int VERSION_MINOR = %(mi)s;
+    public static final int VERSION_REVISION = %(re)s;
+    public static final String VERSION_STATUS = "%(st)s";
+""" % { 'v' : version_str, 'vs' : version_suffix, 'ep' : epoch, 'ma' : major, 'mi' : minor, 're' : revision, 'st': status } )


    def add_class(self, decl):
--- a/modules/matlab/CMakeLists.txt
+++ b/modules/matlab/CMakeLists.txt
@@ -0,0 +1,307 @@
+# ----------------------------------------------------------------------------
+#  CMake file for Matlab/Octave support
+#
+#  Matlab code generation and compilation is broken down into two distinct
+#  stages: configure time and build time. The idea is that we want to give
+#  the user reasonable guarantees that once they type 'make', wrapper
+#  generation is unlikely to fail. Therefore we run a series of tests at
+#  configure time to check the working status of the core components.
+#
+#  Configure Time
+#  During configure time, the script attempts to ascertain whether the
+#  generator and mex compiler are working for a given architecture.
+#  Currently this involves:
+#   1) Generating a simple CV_EXPORTS_W symbol and checking whether a file
+#      of the symbol name is generated
+#   2) Compiling a simple mex gateway to check that Bridge.hpp and mex.h
+#      can be found, and that a file with the mexext is produced
+#
+#  Build Time
+#  If the configure time tests pass, then we assume Matlab wrapper generation
+#  will not fail during build time. We simply glob all of the symbols in
+#  the OpenCV module headers, generate intermediate .cpp files, then compile
+#  them with mex.
+# ----------------------------------------------------------------------------
+
+# PREPEND
+# Given a list of strings IN and a TOKEN, prepend the token to each string
+# and append to OUT. This is used for passing command line "-I", "-L" and "-l"
+# arguments to mex. e.g.
+# prepend("-I" OUT /path/to/include/dir) --> -I/path/to/include/dir
+macro(PREPEND TOKEN OUT IN)
+    foreach(VAR ${IN})
+        list(APPEND ${OUT} "${TOKEN}${VAR}")
+    endforeach()
+endmacro()
+
+
+# WARN_MIXED_PRECISION
+# Formats a warning message if the compiler and Matlab bitness is different
+macro(WARN_MIXED_PRECISION COMPILER_BITNESS MATLAB_BITNESS)
+    set(MSG "Your compiler is ${COMPILER_BITNESS}-bit")
+    set(MSG "${MSG} but your version of Matlab is ${MATLAB_BITNESS}-bit.")
+    set(MSG "${MSG} To build Matlab bindings, please switch to a ${MATLAB_BITNESS}-bit compiler.")
+    message(WARNING ${MSG})
+endmacro()
+
+# ----------------------------------------------------------------------------
+#  Architecture checks
+# ----------------------------------------------------------------------------
+# make sure we're on a supported architecture with Matlab and python installed
+if (IOS OR ANDROID OR NOT MATLAB_FOUND)
+    ocv_module_disable(matlab)
+    return()
+elseif (NOT PYTHONLIBS_FOUND)
+    message(WARNING "A required dependency of the matlab module (PythonLibs) was not found. Disabling Matlab bindings...")
+    ocv_module_disable(matlab)
+    return()
+endif()
+
+
+# If the user built OpenCV as X-bit, but they have a Y-bit version of Matlab,
+# attempting to link to OpenCV during binding generation will fail, since
+# mixed precision pointers are not allowed. Disable the bindings.
+math(EXPR ARCH "${CMAKE_SIZEOF_VOID_P} * 8")
+if (${ARCH} EQUAL 32 AND ${MATLAB_ARCH} MATCHES "64")
+    warn_mixed_precision("32" "64")
+    ocv_module_disable(matlab)
+    return()
+elseif (${ARCH} EQUAL 64 AND NOT ${MATLAB_ARCH} MATCHES "64")
+    warn_mixed_precision("64" "32")
+    ocv_module_disable(matlab)
+    return()
+endif()
+
+# If it's MSVC, warn the user that bindings will only be built in Release mode.
+# Debug mode seems to cause issues...
+if (MSVC)
+    message(STATUS "Warning: Matlab bindings will only be built in Release configurations")
+endif()
+
+
+# ----------------------------------------------------------------------------
+#  Configure time components
+# ----------------------------------------------------------------------------
+set(the_description "The Matlab/Octave bindings")
+ocv_add_module(matlab   BINDINGS
+                        OPTIONAL opencv_core
+                                 opencv_imgproc opencv_ml opencv_highgui
+                                 opencv_objdetect opencv_flann opencv_features2d
+                                 opencv_photo opencv_video opencv_videostab
+                                 opencv_calib opencv_calib3d
+                                 opencv_stitching opencv_superres
+                                 opencv_nonfree
+)
+
+# get the commit information
+execute_process(COMMAND git log -1 --pretty=%H OUTPUT_VARIABLE GIT_COMMIT ERROR_QUIET)
+string(REGEX REPLACE "(\r?\n)+$" "" GIT_COMMIT "${GIT_COMMIT}")
+
+# set the path to the C++ header and doc parser, and template engine
+set(JINJA2_PATH ${CMAKE_SOURCE_DIR}/3rdparty)
+set(HDR_PARSER_PATH ${CMAKE_SOURCE_DIR}/modules/python/src2)
+set(RST_PARSER_PATH ${CMAKE_SOURCE_DIR}/modules/java/generator)
+
+# set mex compiler options
+prepend("-I" MEX_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
+prepend("-L" MEX_LIB_DIR  ${LIBRARY_OUTPUT_PATH}/$(Configuration))
+set(MEX_OPTS "-largeArrayDims")
+
+if (BUILD_TESTS)
+    add_subdirectory(test)
+endif()
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+
+# intersection of available modules and optional dependencies
+# 1. populate the command-line include directories (-I/path/to/module/header, ...)
+# 2. populate the command-line link libraries (-lopencv_core, ...) for Debug and Release
+set(MATLAB_DEPS ${OPENCV_MODULE_${the_module}_REQ_DEPS} ${OPENCV_MODULE_${the_module}_OPT_DEPS})
+foreach(opencv_module ${MATLAB_DEPS})
+    if (HAVE_${opencv_module})
+        string(REPLACE "opencv_" "" module ${opencv_module})
+        list(APPEND opencv_modules ${module})
+        list(APPEND ${the_module}_ACTUAL_DEPS ${opencv_module})
+        prepend("-I" MEX_INCLUDE_DIRS "${OPENCV_MODULE_${opencv_module}_LOCATION}/include")
+        prepend("-l" MEX_LIBS ${opencv_module}${OPENCV_DLLVERSION})
+        prepend("-l" MEX_DEBUG_LIBS ${opencv_module}${OPENCV_DLLVERSION}${OPENCV_DEBUG_POSTFIX})
+    endif()
+endforeach()
+
+# add extra headers by hand
+list(APPEND opencv_extra_hdrs "core=${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core/base.hpp")
+list(APPEND opencv_extra_hdrs "video=${OPENCV_MODULE_opencv_video_LOCATION}/include/opencv2/video/tracking.hpp")
+
+# pass the OPENCV_CXX_EXTRA_FLAGS through to the mex compiler
+# remove the visibility modifiers, so the mex gateway is visible
+# TODO: get mex working without warnings
+string(REGEX REPLACE "[^\ ]*visibility[^\ ]*" "" MEX_CXXFLAGS "${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
+
+# Configure checks
+# Check to see whether the generator and the mex compiler are working.
+# The checks currently test:
+#   - whether the python generator can be found
+#   - whether the python generator correctly outputs a file for a definition
+#   - whether the mex compiler can find the required headers
+#   - whether the mex compiler can compile a trivial definition
+if (NOT MEX_WORKS)
+    # attempt to generate a gateway for a function
+    message(STATUS "Trying to generate Matlab code")
+    execute_process(
+        COMMAND ${PYTHON_EXECUTABLE}
+                ${CMAKE_CURRENT_SOURCE_DIR}/generator/gen_matlab.py
+                --jinja2    ${JINJA2_PATH}
+                --hdrparser ${HDR_PARSER_PATH}
+                --rstparser ${RST_PARSER_PATH}
+                --extra     "test=${CMAKE_CURRENT_SOURCE_DIR}/test/test_generator.hpp"
+                --outdir    ${CMAKE_BINARY_DIR}/junk
+        ERROR_VARIABLE GEN_ERROR
+        OUTPUT_QUIET
+    )
+
+    if (GEN_ERROR)
+        message(${GEN_ERROR})
+        message(STATUS "Error generating Matlab code. Disabling Matlab bindings...")
+        ocv_module_disable(matlab)
+        return()
+    else()
+        message(STATUS "Trying to generate Matlab code - OK")
+    endif()
+
+    # attempt to compile a gateway using mex
+    message(STATUS "Trying to compile mex file")
+    execute_process(
+        COMMAND ${MATLAB_MEX_SCRIPT} ${MEX_OPTS} "CXXFLAGS=\$CXXFLAGS ${MEX_CXX_FLAGS}"
+                ${MEX_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/test/test_compiler.cpp
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/junk
+        ERROR_VARIABLE MEX_ERROR
+        OUTPUT_QUIET
+    )
+
+    if (MEX_ERROR)
+        message(${MEX_ERROR})
+        message(STATUS "Error compiling mex file. Disabling Matlab bindings...")
+        ocv_module_disable(matlab)
+        return()
+    else()
+        message(STATUS "Trying to compile mex file - OK")
+    endif()
+endif()
+
+# if we make it here, mex works!
+set(MEX_WORKS True CACHE BOOL ADVANCED)
+
+
+# ----------------------------------------------------------------------------
+#  Build time components
+# ----------------------------------------------------------------------------
+
+# proxies
+# these proxies are used to trigger the add_custom_commands
+# (which do the real work) only when they're outdated
+set(GENERATE_PROXY ${CMAKE_CURRENT_BINARY_DIR}/generate.proxy)
+set(COMPILE_PROXY ${CMAKE_CURRENT_BINARY_DIR}/compile.proxy)
+# TODO: Remove following line before merging with master
+file(REMOVE ${GENERATE_PROXY} ${COMPILE_PROXY})
+
+# generate
+# call the python executable to generate the Matlab gateways
+add_custom_command(
+    OUTPUT ${GENERATE_PROXY}
+    COMMAND ${PYTHON_EXECUTABLE}
+            ${CMAKE_CURRENT_SOURCE_DIR}/generator/gen_matlab.py
+            --jinja2     ${JINJA2_PATH}
+            --hdrparser  ${HDR_PARSER_PATH}
+            --rstparser  ${RST_PARSER_PATH}
+            --moduleroot ${CMAKE_SOURCE_DIR}/modules
+            --modules    ${opencv_modules}
+            --extra      ${opencv_extra_hdrs}
+            --outdir     ${CMAKE_CURRENT_BINARY_DIR}
+    COMMAND ${PYTHON_EXECUTABLE}
+            ${CMAKE_CURRENT_SOURCE_DIR}/generator/build_info.py
+            --jinja2         ${JINJA2_PATH}
+            --os             ${CMAKE_SYSTEM}
+            --arch           ${ARCH} ${CMAKE_SYSTEM_PROCESSOR}
+            --compiler       ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}
+            --mex_arch       ${MATLAB_ARCH}
+            --mex_script     ${MATLAB_MEX_SCRIPT}
+            --cxx_flags      ${MEX_CXXFLAGS}
+            --opencv_version ${OPENCV_VERSION}
+            --commit         ${GIT_COMMIT}
+            --modules        ${opencv_modules}
+            --configuration  "$(Configuration)" ${CMAKE_BUILD_TYPE}
+            --outdir         ${CMAKE_CURRENT_BINARY_DIR}
+    COMMAND ${PYTHON_EXECUTABLE}
+            ${CMAKE_CURRENT_SOURCE_DIR}/generator/cvmex.py
+            --jinja2 ${JINJA2_PATH}
+            --opts="${MEX_OPTS}"
+            --include_dirs="${MEX_INCLUDE_DIRS}"
+            --lib_dir=${MEX_LIB_DIR}
+            --libs="${MEX_LIBS}"
+            --flags  ${MEX_CXXFLAGS}
+            --outdir ${CMAKE_CURRENT_BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/test/help.m ${CMAKE_CURRENT_BINARY_DIR}/+cv
+    COMMAND ${CMAKE_COMMAND} -E touch ${GENERATE_PROXY}
+    COMMENT "Generating Matlab source files"
+)
+
+# compile
+# call the mex compiler to compile the gateways
+# because we don't know the source files at configure-time, this
+# has to be executed in a separate script in cmake's script processing mode
+add_custom_command(
+    OUTPUT ${COMPILE_PROXY}
+    COMMAND ${CMAKE_COMMAND} -DMATLAB_MEX_SCRIPT=${MATLAB_MEX_SCRIPT}
+                             -DMATLAB_MEXEXT=${MATLAB_MEXEXT}
+                             -DMEX_OPTS=${MEX_OPTS}
+                             -DMEX_CXXFLAGS=${MEX_CXX_FLAGS}
+                             -DMEX_INCLUDE_DIRS="${MEX_INCLUDE_DIRS}"
+                             -DMEX_LIB_DIR=${MEX_LIB_DIR}
+                             -DCONFIGURATION="$(Configuration)"
+                             -DMEX_LIBS="${MEX_LIBS}"
+                             -DMEX_DEBUG_LIBS="${MEX_DEBUG_LIBS}"
+                             -P ${CMAKE_CURRENT_SOURCE_DIR}/compile.cmake
+    COMMAND ${CMAKE_COMMAND} -E touch ${COMPILE_PROXY}
+    COMMENT "Compiling Matlab source files. This could take a while..."
+)
+
+# targets
+# opencv_matlab_sources --> opencv_matlab
+add_custom_target(${the_module}_sources ALL DEPENDS ${GENERATE_PROXY})
+add_custom_target(${the_module} ALL DEPENDS ${COMPILE_PROXY})
+add_dependencies(${the_module} ${the_module}_sources ${${the_module}_ACTUAL_DEPS})
+
+if (ENABLE_SOLUTION_FOLDERS)
+    set_target_properties(${the_module} PROPERTIES FOLDER "modules")
+endif()
+
+
+# ----------------------------------------------------------------------------
+#  Install time components
+# ----------------------------------------------------------------------------
+# NOTE: Trailing slashes on the DIRECTORY paths are important!
+# TODO: What needs to be done with rpath????
+
+# install the +cv directory verbatim
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH})
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/+cv/     DESTINATION matlab/+cv)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/cv.m         DESTINATION matlab)
+
+# update the custom mex compiler to point to the install locations
+string(REPLACE ";" "\\ " MEX_OPTS "${MEX_OPTS}")
+string(REPLACE ";" "\\ " MEX_LIBS "${MEX_LIBS}")
+string(REPLACE " " "\\ " MEX_CXXFLAGS ${MEX_CXXFLAGS})
+string(REPLACE ";" "\\ " MEX_INCLUDE_DIRS "${MEX_INCLUDE_DIRS}")
+install(CODE
+    "execute_process(
+    COMMAND ${PYTHON_EXECUTABLE}
+            ${CMAKE_CURRENT_SOURCE_DIR}/generator/cvmex.py
+            --jinja2 ${JINJA2_PATH}
+            --opts=${MEX_OPTS}
+            --include_dirs=-I${CMAKE_INSTALL_PREFIX}/${OPENCV_INCLUDE_INSTALL_PATH}
+            --lib_dir=-L${CMAKE_INSTALL_PREFIX}/${OPENCV_LIB_INSTALL_PATH}
+            --libs=${MEX_LIBS}
+            --flags=${MEX_CXXFLAGS}
+            --outdir ${CMAKE_INSTALL_PREFIX}/matlab
+    )"
+)
--- a/modules/matlab/LICENSE
+++ b/modules/matlab/LICENSE
@@ -0,0 +1,42 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this
+//  license. If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote
+//     products derived from this software without specific prior written
+//     permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or business
+// interruption) however caused and on any theory of liability, whether in
+// contract, strict liability, or tort (including negligence or otherwise)
+// arising in any way out of the use of this software, even if advised of the
+// possibility of such damage.
+//
+////////////////////////////////////////////////////////////////////////////////
--- a/modules/matlab/README.md
+++ b/modules/matlab/README.md
@@ -0,0 +1,394 @@
+OpenCV Matlab Code Generator
+============================
+This module contains a code generator to automatically produce Matlab mex wrappers for other modules within the OpenCV library. Once compiled and added to the Matlab path, this gives users the ability to call OpenCV methods natively from within Matlab.
+
+
+Build
+-----
+The Matlab code generator is fully integrated into the OpenCV build system. If cmake finds a Matlab installation available on the host system while configuring OpenCV, it will attempt to generate Matlab wrappers for all OpenCV modules. If cmake is having trouble finding your Matlab installation, you can explicitly point it to the root by defining the `MATLAB_ROOT_DIR` variable. For example, on a Mac you could type:
+
+    cmake -DMATLAB_ROOT_DIR=/Applications/MATLAB_R2013a.app ..
+
+
+Install
+-------
+In order to use the bindings, you will need to add them to the Matlab path. The path to add is:
+
+1. `${CMAKE_BUILD_DIR}/modules/matlab` if you are working from the build tree, or
+2. `${CMAKE_INSTALL_PREFIX}/matlab` if you have installed OpenCV
+
+In Matlab, simply run:
+
+    addpath('/path/to/opencv/matlab/');
+
+
+Run
+---
+Once you've added the bindings directory to the Matlab path, you can start using them straight away! OpenCV calls need to be prefixed with a 'cv' qualifier, to disambiguate them from Matlab methods of the same name. For example, to compute the dft of a matrix, you might do the following:
+
+```matlab
+% load an image (Matlab)
+I = imread('cameraman.tif');
+
+% compute the DFT (OpenCV)
+If = cv.dft(I, cv.DFT_COMPLEX_OUTPUT);
+```
+
+As you can see, both OpenCV methods and constants can be used with 'cv' qualification. You can also call:
+
+    help cv.dft
+
+to get help on the purpose and call signature of a particular method, or
+
+    help cv
+
+to get general help regarding the OpenCV bindings. If you ever run into issues with the bindings
+
+    cv.buildInformation();
+
+will produce a printout of diagnostic information pertaining to your particular build of OS, OpenCV and Matlab. It is useful to submit this information alongside a bug report to the OpenCV team.
+
+Writing your own mex files
+--------------------------
+The Matlab bindings come with a set of utilities to help you quickly write your own mex files using OpenCV definitions. By doing so, you have all the speed and freedom of C++, with the power of OpenCV's math expressions and optimizations.
+
+The first thing you need to learn how to do is write a mex-file with Matlab constructs. Following is a brief example:
+
+```cpp
+// include useful constructs
+// this automatically includes opencv core.hpp and mex.h)
+#include <opencv2/matlab/bridge.hpp>
+using namespace cv;
+using namespace matlab;
+using namespace bridge;
+
+// define the mex gateway
+void mexFunction(int nlhs, mxArray* plhs[],
+                 int nrhs, const mxArray* prhs[]) {
+
+  // claim the inputs into scoped management
+  MxArrayVector raw(prhs, prhs+nrhs);
+
+  // add an argument parser to automatically handle basic options
+  ArgumentParser parser("my function");
+  parser.addVariant(1, 1, "opt");
+  MxArrayVector reordered = parser.parse(raw);
+
+  // if we get here, we know the inputs are valid and reordered. Unpack...
+  BridgeVector inputs(reordered.begin(), reordered.end());
+  Mat required    = inputs[0].toMat();
+  string optional = inputs[1].empty() ? "Default string" : inputs[1].toString();
+
+  try {
+    // Do stuff...
+  } catch(Exception& e) {
+    error(e.what());
+  } catch(...) {
+    error("Uncaught exception occurred");
+  }
+
+  // allocate an output
+  Bridge out = required;
+  plhs[0] = out.toMxArray().releaseOwnership();
+}
+```
+
+There are a couple of important things going on in this example. Firstly, you need to include `<opencv2/matlab/bridge.hpp>` to enable the bridging capabilities. Once you've done this, you get some nice utilities for free. `MxArray` is a class that wraps Matlab's `mxArray*` class in an OOP-style interface. `ArgumentParser` is a class that handles default, optional and named arguments for you, along with multiple possible calling syntaxes. Finally, `Bridge` is a class that allows bidirectional conversions between OpenCV/std and Matlab types.
+
+Once you have written your file, it can be compiled with the provided mex utility:
+
+    cv.mex('my_function.cpp');
+
+This utility automatically links in all of the necessary OpenCV libraries to make your function work.
+
+NOTE: OpenCV uses exceptions throughout the codebase. It is a **very** good idea to wrap your code in exception handlers to avoid crashing Matlab in the event of an exception being thrown.
+
+------------------------------------------------------------------
+
+
+Developer
+=========
+The following sections contain information for developers seeking to use, understand or extend the Matlab bindings. The bindings are generated in python using a powerful templating engine called Jinja2. Because Matlab mex gateways have a common structure, they are well suited to templatization. There are separate templates for formatting C++ classes, Matlab classes, C++ functions, constants (enums) and documentation.
+
+The task of the generator is two-fold:
+
+1. To parse the OpenCV headers and build a semantic tree that can be fed to the template engine
+2. To define type conversions between C++/OpenCV and Matlab types
+
+Once a source file has been generated for each OpenCV definition, and type conversions have been established, the mex compiler is invoked to produce the mex gateway (shared object) and link in the OpenCV libraries.
+
+
+File layout
+-----------
+opencv/modules/matlab (this module)
+
+* `CMakeLists.txt` (main cmake configuration file)
+* `README.md` (this file)
+* `compile.cmake` (the cmake script for compiling generated source code)
+* `generator` (the folder containing generator code)
+  * `filters.py` (template filters)
+  * `gen_matlab.py` (the binding generator control script)
+  * `parse_tree.py` (python class to refactor the hdr_parser.py output)
+  * `templates` (the raw templates for populating classes, constants, functions and docs)
+* `include` (C++ headers for the bindings)
+  * `mxarray.hpp` (C++ OOP-style interface for Matlab mxArray* class)
+  * `bridge.hpp` (type conversions)
+  * `map.hpp` (hash map interface for instance storage and method lookup)
+* `test` (generator, compiler and binding test scripts)
+
+
+Call Tree
+---------
+The cmake call tree can be broken into 3 main components:
+
+1. configure time
+2. build time
+3. install time
+
+**Find Matlab (configure)**
+The first thing to do is discover a Matlab installation on the host system. This is handled by the `OpenCVFindMatlab.cmake` in `opencv/cmake`. On Windows machines it searches the registry and path, while on *NIX machines it searches a set of canonical install paths. Once Matlab has been found, a number of variables are defined, such as the path to the mex compiler, the mex libraries, the mex include paths, the architectural extension, etc.
+
+**Test the generator (configure)**
+Attempt to produce a source file for a simple definition. This tests whether python and pythonlibs are correctly invoked on the host.
+
+**Test the mex compiler (configure)**
+Attempt to compile a simple definition using the mex compiler. A mex file is actually just a shared object with a special exported symbol `_mexFunction` which serves as the entry-point to the function. As such, the mex compiler is just a set of scripts configuring the system compiler. In most cases this is the same as the OpenCV compiler, but *could* be different. The test checks whether the mex and generator includes can be found, the system libraries can be linked and the passed compiler flags are compatible.
+
+If any of the configure time tests fail, the bindings will be disabled, but the main OpenCV configure will continue without error. The configuration summary will contain the block:
+
+  Matlab
+    mex:          /Applications/MATLAB_R2013a.app/bin/mex
+    compiler/generator:    Not working (bindings will not be generated)
+
+**Generate the sources (build)**
+Given a set of modules (the intersection of the OpenCV modules being built and the matlab module optional dependencies), the `CppHeaderParser()` from `opencv/modules/python/src2/hdr_parser.py` will parse the module headers and produce a set of definitions.
+
+The `ParseTree()` from `opencv/modules/matlab/generator/parse_tree.py` takes this set of definitions and refactors them into a semantic tree better suited to templatization. For example, a trivial definition from the header parser may look something like:
+
+```python
+[fill, void, ['/S'], [cv::Mat&, mat, '', ['/I', '/O']]]
+```
+
+The equivalent refactored output will look like:
+
+```python
+  Function
+    name   = 'fill'
+    rtype  = 'void'
+    static = True
+    req =
+      Argument
+        name    = 'mat'
+        type    = 'cv::Mat'
+        ref     = '&'
+        I       = True
+        O       = True
+        default = ''
+```
+
+The added semantics (Namespace, Class, Function, Argument, name, etc) make it easier for the templating engine to parse, slice and populate definitions.
+
+Once the definitions have been parsed, `gen_matlab.py` passes each definition to the template engine with the appropriate template (class, function, enum, doc) and the filled template gets written to the `${CMAKE_CURRENT_BUILD_DIR}/src` directory.
+
+The generator relies upon a proxy object called `generate.proxy` to determine when the sources are out of date and need to be re-generated.
+
+**Compile the sources (build)**
+Once the sources have been generated, they are compiled by the mex compiler. The `compile.cmake` script in `opencv/modules/matlab/` takes responsibility for iterating over each source file in `${CMAKE_CURRENT_BUILD_DIR}/src` and compiling it with the passed includes and OpenCV libraries.
+
+The flags used to compile the main OpenCV libraries are also forwarded to the mex compiler. So if, for example, you compiled OpenCV with SSE support, the mex bindings will also use SSE. Likewise, if you compile OpenCV in debug mode, the bindings will link to the debug version of the libraries.
+
+Importantly, the mex compiler includes the `mxarray.hpp`, `bridge.hpp` and `map.hpp` files from the `opencv/modules/matlab/include` directory. `mxarray.hpp` defines a `MxArray` class which wraps Matlab's `mxArray*` type in a more friendly OOP-syle interface. `bridge.hpp` defines a `Bridge` class which is able to perform type conversions between Matlab types and std/OpenCV types. It can be extended with new definitions using the plugin interface described in that file.
+
+The compiler relies upon a proxy object called `compile.proxy` to determine when the generated sources are out of date and need to be re-compiled.
+
+**Install the files (install)**
+At install time, the mex files are put into place at `${CMAKE_INSTALL_PREFIX}/matlab` and their linkages updated.
+
+
+Jinja2
+------
+Jinja2 is a powerful templating engine, similar to python's builtin `string.Template` class but implementing the model-view-controller paradigm. For example, a trivial view could be populated as follows:
+
+**view.py**
+
+```html+django
+<title>{{ title }}</title>
+<ul>
+{% for user in users %}
+  <li><a href="{{ user.url }}">{{ user.username | sanitize }}</a></li>
+{% endfor %}
+</ul>
+```
+
+**model.py**
+
+```python
+class User(object):
+  __init__(self):
+    self.username = ''
+    self.url = ''
+
+def sanitize(text):
+  """Filter for escaping html tags to prevent code injection"""
+```
+
+**controller.py**
+
+```python
+def populate(users):
+# initialize jinja
+jtemplate = jinja2.Environment(loader=FileSystemLoader())
+
+# add the filters to the engine
+jtemplate['sanitize'] = sanitize
+
+# get the view
+template = jtemplate.get_template('view')
+
+# populate the template with a list of User objects
+populated = template.render(title='all users', users=users)
+
+# write to file
+with open('users.html', 'wb') as f:
+  f.write(populated)
+```
+
+Thus the style and layout of the view is kept separate from the content (model). This modularity improves readability and maintainability of both the view and content and (for my own sanity) has helped significantly in debugging errors.
+
+File Reference
+--------------
+**gen_matlab.py**
+gen_matlab has the following call signature:
+
+  gen_matlab.py --jinja2 path/to/jinja2/engine
+          --hdrparser path/to/hdr_parser/dir
+          --rstparser path/to/rst_parser/dir
+          --moduleroot path/to/opencv/modules
+          --modules [core imgproc highgui ...]
+          --extra namespace=/additional/header/to/parse
+          --outdir /path/to/place/generated/src
+
+**build_info.py**
+build_info has the following call signature:
+
+  build_info.py --jinja2 path/to/jinja2/engine
+          --os operating_system_string
+          --arch [bitness processor]
+          --compiler [id version]
+          --mex_arch arch_string
+          --mex_script /path/to/mex/script
+          --cxx_flags [-list -of -flags -to -passthrough]
+          --opencv_version version_string
+          --commit commit_hash_if_using_git
+          --modules core imgproc highgui etc
+          --configuration Debug/Release
+          --outdir path/to/place/build/info
+
+**cvmex.py**
+cvmex.py, the custom compiler generator, has the following call signature:
+
+  cvmex.py --jinja2 path/to/jinja2/engine
+          --opts [-list -of -opts]
+          --include_dirs [-list -of -opencv_include_directories]
+          --lib_dir opencv_lib_directory
+          --libs [-lopencv_core -lopencv_imgproc ...]
+          --flags [-Wall -opencv_build_flags ...]
+          --outdir /path/to/generated/output
+
+**parse_tree.py**
+To build a parse tree, first parse a set of headers, then invoke the parse tree to refactor the output:
+
+```python
+# parse a set of definitions into a dictionary of namespaces
+parser = CppHeaderParser()
+ns['core'] = parser.parse('path/to/opencv/core.hpp')
+
+# refactor into a semantic tree
+parse_tree = ParseTree()
+parse_tree.build(ns)
+
+# iterate over the tree
+for namespace in parse_tree.namespaces:
+  for clss in namespace.classes:
+    # do stuff
+  for method in namespace.methods:
+    # do stuff
+```
+
+**mxarray.hpp**
+mxarray.hpp defines a class called `MxArray` which provides an OOP-style interface for Matlab's homogeneous `mxArray*` type. To create an `MxArray`, you can either inherit an existing array
+
+```cpp
+MxArray mat(prhs[0]);
+```
+
+or create a new array
+
+```cpp
+MxArray mat(5, 5, Matlab::Traits<double>::ScalarType);
+MxArray mat = MxArray::Matrix<double>(5, 5);
+```
+
+The default constructor allocates a `0 x 0` array. Once you have encapculated an `mxArray*` you can access its properties through member functions:
+
+```cpp
+mat.rows();
+mat.cols();
+mat.size();
+mat.channels();
+mat.isComplex();
+mat.isNumeric();
+mat.isLogical();
+mat.isClass();
+mat.className();
+mat.real();
+mat.imag();
+```
+
+The MxArray object uses scoped memory management. If you wish to pass an MxArray back to Matlab (as a lhs pointer), you need to explicitly release ownership of the array so that it is not destroyed when it leaves scope:
+
+```cpp
+plhs[0] = mat.releaseOwnership();
+```
+
+mxarray.hpp also includes a number of helper utilities that make working in mex-world a little easier. One such utility is the `ArgumentParser`. `ArgumentParser` automatically handles required and optional arguments to a method, and even enables named arguments as used in many core Matlab functions. For example, if you had a function with the following signature:
+
+```cpp
+void f(Mat first, Mat second, Mat mask=Mat(), int dtype=-1);
+```
+
+then you can create an `ArgumentParser` as follows:
+
+```cpp
+ArgumentParser parser("f");
+parser.addVariant(2, 2, "mask", "dtype");
+MxArrayVector inputs = parser.parse(prhs, prhs+nrhs);
+```
+
+and that will make available the following calling syntaxes:
+
+```matlab
+f(first, second);
+f(first, second, mask);
+f(first, second, mask, dtype);
+f(first, second, 'dtype', dtype, 'mask', mask); % optional ordering does not matter
+f(first, second, 'dtype', dtype); % only second optional argument provided
+f(first, second, mask, 'dtype', dtype); % mixture of ordered and named
+```
+
+Further, the output of the `parser.parse()` method will always contain the total number of required and optional arguments that the method can take, with unspecified arguments given by empty matrices. Thus, to check if an optional argument has been given, you can do:
+
+```cpp
+int dtype = inputs[3].empty() ? -1 : inputs[3].scalar<double>();
+```
+
+**bridge.hpp**
+The bridge interface defines a `Bridge` class which provides type conversion between std/OpenCV and Matlab types. A type conversion must provide the following:
+
+```cpp
+Bridge& operator=(const MyObject&);
+MyObject toMyObject();
+operator MyObject();
+```
+
+The binding generator will then automatically call the conversion operators (either explicitly or implicitly) if your `MyObject` class is encountered as an input or return from a parsed definition.
--- a/modules/matlab/compile.cmake
+++ b/modules/matlab/compile.cmake
@@ -0,0 +1,49 @@
+# LISTIFY
+# Given a string of space-delimited tokens, reparse as a string of
+# semi-colon delimited tokens, which in CMake land is exactly equivalent
+# to a list
+macro(listify OUT_LIST IN_STRING)
+    string(REPLACE " " ";" ${OUT_LIST} ${IN_STRING})
+endmacro()
+
+# listify multiple-argument inputs
+listify(MEX_INCLUDE_DIRS_LIST ${MEX_INCLUDE_DIRS})
+if (${CONFIGURATION} MATCHES "Debug")
+    listify(MEX_LIBS_LIST ${MEX_DEBUG_LIBS})
+else()
+    listify(MEX_LIBS_LIST ${MEX_LIBS})
+endif()
+
+# if it's MSVC building a Debug configuration, don't build bindings
+if ("${CONFIGURATION}" MATCHES "Debug")
+    message(STATUS "Matlab bindings are only available in Release configurations. Skipping...")
+    return()
+endif()
+
+# -----------------------------------------------------------------------------
+# Compile
+# -----------------------------------------------------------------------------
+# for each generated source file:
+# 1. check if the file has already been compiled
+# 2. attempt compile if required
+# 3. if the compile fails, throw an error and cancel compilation
+file(GLOB SOURCE_FILES "${CMAKE_CURRENT_BINARY_DIR}/src/*.cpp")
+foreach(SOURCE_FILE ${SOURCE_FILES})
+    # strip out the filename
+    get_filename_component(FILENAME ${SOURCE_FILE} NAME_WE)
+    # compile the source file using mex
+    if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/+cv/${FILENAME}.${MATLAB_MEXEXT})
+        execute_process(
+            COMMAND ${MATLAB_MEX_SCRIPT} ${MEX_OPTS} "CXXFLAGS=\$CXXFLAGS ${MEX_CXXFLAGS}" ${MEX_INCLUDE_DIRS_LIST}
+                    ${MEX_LIB_DIR} ${MEX_LIBS_LIST} ${SOURCE_FILE}
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/+cv
+            OUTPUT_QUIET
+            ERROR_VARIABLE FAILED
+        )
+    endif()
+    # TODO: If a mex file fails to compile, should we error out?
+    # TODO: Warnings are currently treated as errors...
+    if (FAILED)
+        message(FATAL_ERROR "Failed to compile ${FILENAME}: ${FAILED}")
+    endif()
+endforeach()
--- a/modules/matlab/generator/build_info.py
+++ b/modules/matlab/generator/build_info.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+def substitute(build, output_dir):
+
+    # setup the template engine
+    template_dir = os.path.join(os.path.dirname(__file__), 'templates')
+    jtemplate    = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True)
+
+    # add the filters
+    jtemplate.filters['csv'] = csv
+    jtemplate.filters['stripExtraSpaces'] = stripExtraSpaces
+
+    # load the template
+    template = jtemplate.get_template('template_build_info.m')
+
+    # create the build directory
+    output_dir  = output_dir+'/+cv'
+    if not os.path.isdir(output_dir):
+      os.mkdir(output_dir)
+
+    # populate template
+    populated = template.render(build=build, time=time)
+    with open(os.path.join(output_dir, 'buildInformation.m'), 'wb') as f:
+        f.write(populated)
+
+if __name__ == "__main__":
+    """
+    Usage: python build_info.py --jinja2 /path/to/jinja2/engine
+                                --os os_version_string
+                                --arch [bitness processor]
+                                --compiler [id version]
+                                --mex_arch arch_string
+                                --mex_script /path/to/mex/script
+                                --cxx_flags [-list -of -flags -to -passthrough]
+                                --opencv_version version_string
+                                --commit commit_hash_if_using_git
+                                --modules [core imgproc highgui etc]
+                                --configuration Debug/Release
+                                --outdir /path/to/write/build/info
+
+    build_info.py generates a Matlab function that can be invoked with a call to
+      >> cv.buildInformation();
+
+    This function prints a summary of the user's OS, OpenCV and Matlab build
+    given the information passed to this module. build_info.py invokes Jinja2
+    on the template_build_info.m template.
+    """
+
+    # parse the input options
+    import sys, re, os, time
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument('--jinja2')
+    parser.add_argument('--os')
+    parser.add_argument('--arch', nargs=2)
+    parser.add_argument('--compiler', nargs='+')
+    parser.add_argument('--mex_arch')
+    parser.add_argument('--mex_script')
+    parser.add_argument('--mex_opts', default=['-largeArrayDims'], nargs='*')
+    parser.add_argument('--cxx_flags', default=[], nargs='*')
+    parser.add_argument('--opencv_version', default='', nargs='?')
+    parser.add_argument('--commit', default='Not in working git tree', nargs='?')
+    parser.add_argument('--modules', nargs='+')
+    parser.add_argument('--configuration')
+    parser.add_argument('--outdir')
+    build = parser.parse_args()
+
+    # add jinja to the path
+    sys.path.append(build.jinja2)
+
+    from filters import *
+    from jinja2 import Environment, FileSystemLoader
+
+    # populate the build info template
+    substitute(build, build.outdir)
--- a/modules/matlab/generator/cvmex.py
+++ b/modules/matlab/generator/cvmex.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+def substitute(cv, output_dir):
+
+    # setup the template engine
+    template_dir = os.path.join(os.path.dirname(__file__), 'templates')
+    jtemplate    = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True)
+
+    # add the filters
+    jtemplate.filters['cellarray'] = cellarray
+    jtemplate.filters['split'] = split
+    jtemplate.filters['csv'] = csv
+
+    # load the template
+    template = jtemplate.get_template('template_cvmex_base.m')
+
+    # create the build directory
+    output_dir  = output_dir+'/+cv'
+    if not os.path.isdir(output_dir):
+      os.mkdir(output_dir)
+
+    # populate template
+    populated = template.render(cv=cv, time=time)
+    with open(os.path.join(output_dir, 'mex.m'), 'wb') as f:
+        f.write(populated)
+
+if __name__ == "__main__":
+    """
+    Usage: python cvmex.py  --jinja2 /path/to/jinja2/engine
+                            --opts [-list -of -opts]
+                            --include_dirs [-list -of -opencv_include_directories]
+                            --lib_dir opencv_lib_directory
+                            --libs [-lopencv_core -lopencv_imgproc ...]
+                            --flags [-Wall -opencv_build_flags ...]
+                            --outdir /path/to/generated/output
+
+    cvmex.py generates a custom mex compiler that automatically links OpenCV
+    libraries to built sources where appropriate. The calling syntax is the
+    same as the builtin mex compiler, with added cv qualification:
+      >> cv.mex(..., ...);
+    """
+
+    # parse the input options
+    import sys, re, os, time
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument('--jinja2')
+    parser.add_argument('--opts')
+    parser.add_argument('--include_dirs')
+    parser.add_argument('--lib_dir')
+    parser.add_argument('--libs')
+    parser.add_argument('--flags')
+    parser.add_argument('--outdir')
+    cv = parser.parse_args()
+
+    # add jinja to the path
+    sys.path.append(cv.jinja2)
+
+    from filters import *
+    from jinja2 import Environment, FileSystemLoader
+
+    # populate the mex base template
+    substitute(cv, cv.outdir)
--- a/modules/matlab/generator/filters.py
+++ b/modules/matlab/generator/filters.py
@@ -0,0 +1,180 @@
+from textwrap import TextWrapper
+from string import split, join
+import re, os
+# precompile a URL matching regular expression
+urlexpr = re.compile(r"((https?):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)", re.MULTILINE|re.UNICODE)
+
+def inputs(args):
+    '''Keeps only the input arguments in a list of elements.
+    In OpenCV input arguments are all arguments with names
+    not beginning with 'dst'
+    '''
+    try:
+      return [arg for arg in args['only'] if arg.I and not arg.O]
+    except:
+      return [arg for arg in args if arg.I]
+
+def ninputs(fun):
+    '''Counts the number of input arguments in the input list'''
+    return len(inputs(fun.req)) + len(inputs(fun.opt))
+
+def outputs(args):
+    '''Determines whether any of the given arguments is an output
+    reference, and returns a list of only those elements.
+    In OpenCV, output references are preceeded by 'dst'
+    '''
+    try:
+      return [arg for arg in args['only'] if arg.O and not arg.I]
+    except:
+      return [arg for arg in args if arg.O]
+
+def only(args):
+    '''Returns exclusively the arguments which are only inputs
+    or only outputs'''
+    d = {};
+    d['only'] = args
+    return d
+
+def void(arg):
+    '''Is the input 'void' '''
+    return arg == 'void'
+
+def flip(arg):
+    '''flip the sign of the input'''
+    return not arg
+
+def noutputs(fun):
+    '''Counts the number of output arguments in the input list'''
+    return int(not void(fun.rtp)) + len(outputs(fun.req)) + len(outputs(fun.opt))
+
+def convertibleToInt(string):
+    '''Can the input string be evaluated to an integer?'''
+    salt = '1+'
+    try:
+        exec(salt+string)
+        return True
+    except:
+        return False
+
+def binaryToDecimal(string):
+    '''Attempt to convert the input string to floating point representation'''
+    try:
+        return str(eval(string))
+    except:
+        return string
+
+def formatMatlabConstant(string, table):
+    '''
+    Given a string representing a Constant, and a table of all Constants,
+    attempt to resolve the Constant into a valid Matlab expression
+    For example, the input
+      DEPENDENT_VALUE = 1 << FIXED_VALUE
+    needs to be converted to
+      DEPENDENT_VALUE = bitshift(1, cv.FIXED_VALUE);
+    '''
+    # split the string into expressions
+    words = re.split('(\W+)', string)
+    # add a 'cv' prefix if an expression is also a key in the lookup table
+    words = ''.join([('cv.'+word if word in table else word) for word in words])
+    # attempt to convert arithmetic expressions and binary/hex to decimal
+    words = binaryToDecimal(words)
+    # convert any remaining bitshifts to Matlab 'bitshift' methods
+    shift = re.sub('[\(\) ]', '', words).split('<<')
+    words = 'bitshift('+shift[0]+', '+shift[1]+')' if len(shift) == 2 else words
+    return words
+
+def matlabURL(string):
+    """This filter is used to construct a Matlab specific URL that calls the
+    system browser instead of the (insanely bad) builtin Matlab browser"""
+    return re.sub(urlexpr, '<a href="matlab: web(\'\\1\', \'-browser\')">\\1</a>', string)
+
+def capitalizeFirst(text):
+    '''Capitalize only the first character of the text string'''
+    return text[0].upper() + text[1:]
+
+def toUpperCamelCase(text):
+    '''variable_name --> VariableName'''
+    return ''.join([capitalizeFirst(word) for word in text.split('_')])
+
+def toLowerCamelCase(text):
+    '''variable_name --> variableName'''
+    upper_camel = toUpperCamelCase(test)
+    return upper_camel[0].lower() + upper_camel[1:]
+
+def toUnderCase(text):
+    '''VariableName --> variable_name'''
+    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text)
+    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+def stripTags(text):
+    '''
+    strip or convert html tags from a text string
+    <code>content</code> --> content
+    <anything>           --> ''
+    &lt                  --> <
+    &gt                  --> >
+    &le                  --> <=
+    &ge                  --> >=
+    '''
+    upper = lambda pattern: pattern.group(1).upper()
+    text = re.sub('<code>(.*?)</code>', upper, text)
+    text = re.sub('<([^=\s].*?)>', '', text)
+    text = re.sub('&lt', '<', text)
+    text = re.sub('&gt', '>', text)
+    text = re.sub('&le', '<=', text)
+    text = re.sub('&ge', '>=', text)
+    return text
+
+def qualify(text, name):
+    '''Adds uppercase 'CV.' qualification to any occurrences of name in text'''
+    return re.sub(name.upper(), 'CV.'+name.upper(), text)
+
+def slugify(text):
+    '''A_Function_name --> a-function-name'''
+    return text.lower().replace('_', '-')
+
+def filename(fullpath):
+    '''Returns only the filename without an extension from a file path
+    eg. /path/to/file.txt --> file
+    '''
+    return os.path.splitext(os.path.basename(fullpath))[0]
+
+def split(text, delimiter=' '):
+    '''Split a text string into a list using the specified delimiter'''
+    return text.split(delimiter)
+
+def csv(items, sep=', '):
+    '''format a list with a separator (comma if not specified)'''
+    return sep.join(item for item in items)
+
+def cellarray(items, escape='\''):
+    '''format a list of items as a matlab cell array'''
+    return '{' + ', '.join(escape+item+escape for item in items) + '}'
+
+def stripExtraSpaces(text):
+    '''Removes superfluous whitespace from a string, including the removal
+    of all leading and trailing whitespace'''
+    return ' '.join(text.split())
+
+def comment(text, wrap=80, escape='% ', escape_first='', escape_last=''):
+    '''comment filter
+    Takes a string in text, and wraps it to wrap characters in length with
+    preceding comment escape sequence on each line. escape_first and
+    escape_last can be used for languages which define block comments.
+    Examples:
+        C++ inline comment    comment(80, '// ')
+        C block comment:      comment(80, ' * ', '/*', ' */')
+        Matlab comment:       comment(80, '% ')
+        Matlab block comment: comment(80, '', '%{', '%}')
+        Python docstrings:    comment(80, '', '\'\'\'', '\'\'\'')
+    '''
+
+    tw = TextWrapper(width=wrap-len(escape))
+    if escape_first:
+        escape_first = escape_first+'\n'
+    if escape_last:
+        escape_last = '\n'+escape_last
+    escapn = '\n'+escape
+    lines  = text.split('\n')
+    wlines = (tw.wrap(line) for line in lines)
+    return escape_first+escape+join((join(line, escapn) for line in wlines), escapn)+escape_last
--- a/modules/matlab/generator/gen_matlab.py
+++ b/modules/matlab/generator/gen_matlab.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python
+
+class MatlabWrapperGenerator(object):
+    """
+    MatlabWrapperGenerator is a class for generating Matlab mex sources from
+    a set of C++ headers. MatlabWrapperGenerator objects can be default
+    constructed. Given an instance, the gen() method performs the translation.
+    """
+
+    def gen(self, module_root, modules, extras, output_dir):
+        """
+        Generate a set of Matlab mex source files by parsing exported symbols
+        in a set of C++ headers. The headers can be input in one (or both) of
+        two methods:
+        1. specify module_root and modules
+           Given a path to the OpenCV module root and a list of module names,
+           the headers to parse are implicitly constructed.
+        2. specifiy header locations explicitly in extras
+           Each element in the list of extras must be of the form:
+           'namespace=/full/path/to/extra/header.hpp' where 'namespace' is
+           the namespace in which the definitions should be added.
+        The output_dir specifies the directory to write the generated sources
+        to.
+        """
+        # parse each of the files and store in a dictionary
+        # as a separate "namespace"
+        parser = CppHeaderParser()
+        rst    = rst_parser.RstParser(parser)
+        rst_parser.verbose = False
+        rst_parser.show_warnings = False
+        rst_parser.show_errors = False
+        rst_parser.show_critical_errors = False
+
+        ns  = dict((key, []) for key in modules)
+        doc = dict((key, []) for key in modules)
+        path_template = Template('${module}/include/opencv2/${module}.hpp')
+
+        for module in modules:
+            # construct a header path from the module root and a path template
+            header = os.path.join(module_root, path_template.substitute(module=module))
+            # parse the definitions
+            ns[module] = parser.parse(header)
+            # parse the documentation
+            rst.parse(module, os.path.join(module_root, module))
+            doc[module] = rst.definitions
+            rst.definitions = {}
+
+        for extra in extras:
+            module = extra.split("=")[0]
+            header = extra.split("=")[1]
+            ns[module] = ns[module] + parser.parse(header) if module in ns else parser.parse(header)
+
+        # cleanify the parser output
+        parse_tree = ParseTree()
+        parse_tree.build(ns)
+
+        # setup the template engine
+        template_dir = os.path.join(os.path.dirname(__file__), 'templates')
+        jtemplate    = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True)
+
+        # add the custom filters
+        jtemplate.filters['formatMatlabConstant'] = formatMatlabConstant
+        jtemplate.filters['convertibleToInt'] = convertibleToInt
+        jtemplate.filters['toUpperCamelCase'] = toUpperCamelCase
+        jtemplate.filters['toLowerCamelCase'] = toLowerCamelCase
+        jtemplate.filters['toUnderCase'] = toUnderCase
+        jtemplate.filters['matlabURL'] = matlabURL
+        jtemplate.filters['stripTags'] = stripTags
+        jtemplate.filters['filename'] = filename
+        jtemplate.filters['comment']  = comment
+        jtemplate.filters['inputs']   = inputs
+        jtemplate.filters['ninputs'] = ninputs
+        jtemplate.filters['outputs']  = outputs
+        jtemplate.filters['noutputs'] = noutputs
+        jtemplate.filters['qualify'] = qualify
+        jtemplate.filters['slugify'] = slugify
+        jtemplate.filters['only'] = only
+        jtemplate.filters['void'] = void
+        jtemplate.filters['not'] = flip
+
+        # load the templates
+        tfunction  = jtemplate.get_template('template_function_base.cpp')
+        tclassm    = jtemplate.get_template('template_class_base.m')
+        tclassc    = jtemplate.get_template('template_class_base.cpp')
+        tdoc       = jtemplate.get_template('template_doc_base.m')
+        tconst     = jtemplate.get_template('template_map_base.m')
+
+        # create the build directory
+        output_source_dir  = output_dir+'/src'
+        output_private_dir = output_source_dir+'/private'
+        output_class_dir   = output_dir+'/+cv'
+        output_map_dir     = output_dir+'/map'
+        if not os.path.isdir(output_source_dir):
+          os.mkdir(output_source_dir)
+        if not os.path.isdir(output_private_dir):
+          os.mkdir(output_private_dir)
+        if not os.path.isdir(output_class_dir):
+          os.mkdir(output_class_dir)
+        if not os.path.isdir(output_map_dir):
+          os.mkdir(output_map_dir)
+
+        # populate templates
+        for namespace in parse_tree.namespaces:
+            # functions
+            for method in namespace.methods:
+                populated = tfunction.render(fun=method, time=time, includes=namespace.name)
+                with open(output_source_dir+'/'+method.name+'.cpp', 'wb') as f:
+                    f.write(populated)
+                if namespace.name in doc and method.name in doc[namespace.name]:
+                    populated = tdoc.render(fun=method, doc=doc[namespace.name][method.name], time=time)
+                    with open(output_class_dir+'/'+method.name+'.m', 'wb') as f:
+                        f.write(populated)
+            # classes
+            for clss in namespace.classes:
+                # cpp converter
+                populated = tclassc.render(clss=clss, time=time)
+                with open(output_private_dir+'/'+clss.name+'Bridge.cpp', 'wb') as f:
+                    f.write(populated)
+                # matlab classdef
+                populated = tclassm.render(clss=clss, time=time)
+                with open(output_class_dir+'/'+clss.name+'.m', 'wb') as f:
+                    f.write(populated)
+
+        # create a global constants lookup table
+        const = dict(constants(todict(parse_tree.namespaces)))
+        populated = tconst.render(constants=const, time=time)
+        with open(output_dir+'/cv.m', 'wb') as f:
+            f.write(populated)
+
+
+if __name__ == "__main__":
+    """
+    Usage: python gen_matlab.py --jinja2 /path/to/jinja2/engine
+                                --hdrparser /path/to/hdr_parser/dir
+                                --rstparser /path/to/rst_parser/dir
+                                --moduleroot /path/to/opencv/modules
+                                --modules [core imgproc objdetect etc]
+                                --extra namespace=/path/to/extra/header.hpp
+                                --outdir /path/to/output/generated/srcs
+
+    gen_matlab.py is the main control script for generating matlab source
+    files from given set of headers. Internally, gen_matlab:
+      1. constructs the headers to parse from the module root and list of modules
+      2. parses the headers using CppHeaderParser
+      3. refactors the definitions using ParseTree
+      4. parses .rst docs using RstParser
+      5. populates the templates for classes, function, enums and docs from the
+         definitions
+
+    gen_matlab.py requires the following inputs:
+    --jinja2       the path to the Jinja2 templating engine
+                   e.g. ${CMAKE_SOURCE_DIR}/3rdparty
+    --hdrparser    the path to the header parser directory
+                   (opencv/modules/python/src2)
+    --rstparser    the path to the rst parser directory
+                   (opencv/modules/java/generator)
+    --moduleroot   (optional) path to the opencv directory containing the modules
+    --modules      (optional - required if --moduleroot specified) the modules
+                   to produce bindings for. The path to the include directories
+                   as well as the namespaces are constructed from the modules
+                   and the moduleroot
+    --extra        extra headers explicitly defined to parse. This must be in
+                   the format "namepsace=/path/to/extra/header.hpp". For example,
+                   the core module requires the extra header:
+                   "core=/opencv/modules/core/include/opencv2/core/core/base.hpp"
+    --outdir       the output directory to put the generated matlab sources. In
+                   the OpenCV build this is "${CMAKE_CURRENT_BUILD_DIR}/src"
+    """
+
+    # parse the input options
+    import sys, re, os, time
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument('--jinja2')
+    parser.add_argument('--hdrparser')
+    parser.add_argument('--rstparser')
+    parser.add_argument('--moduleroot', default='', required=False)
+    parser.add_argument('--modules', nargs='*', default=[], required=False)
+    parser.add_argument('--extra', nargs='*', default=[], required=False)
+    parser.add_argument('--outdir')
+    args = parser.parse_args()
+
+    # add the hdr_parser and rst_parser modules to the path
+    sys.path.append(args.jinja2)
+    sys.path.append(args.hdrparser)
+    sys.path.append(args.rstparser)
+
+    from string import Template
+    from hdr_parser import CppHeaderParser
+    import rst_parser
+    from parse_tree import ParseTree, todict, constants
+    from filters import *
+    from jinja2 import Environment, FileSystemLoader
+
+    # create the generator
+    mwg = MatlabWrapperGenerator()
+    mwg.gen(args.moduleroot, args.modules, args.extra, args.outdir)
--- a/modules/matlab/generator/parse_tree.py
+++ b/modules/matlab/generator/parse_tree.py
@@ -0,0 +1,356 @@
+from string import join
+from textwrap import fill
+from filters import *
+
+class ParseTree(object):
+    """
+    The ParseTree class produces a semantic tree of C++ definitions given
+    the output of the CppHeaderParser (from opencv/modules/python/src2/hdr_parser.py)
+
+    The full hierarchy is as follows:
+
+      Namespaces
+        |
+        |- name
+        |- Classes
+            |
+            |- name
+            |- Methods
+            |- Constants
+        |- Methods
+            |
+            |- name
+            |- static (T/F)
+            |- return type
+            |- required Arguments
+                |
+                |- name
+                |- const (T/F)
+                |- reference ('&'/'*')
+                |- type
+                |- input
+                |- output (pass return by reference)
+                |- default value
+            |- optional Arguments
+        |- Constants
+            |
+            |- name
+            |- const (T/F)
+            |- reference ('&'/'*')
+            |- type
+            |- value
+
+    The semantic tree contains substantial information for easily introspecting
+    information about objects. How many methods does the 'core' namespace have?
+    Does the 'randn' method have any return by reference (output) arguments?
+    How many required and optional arguments does the 'add' method have? Is the
+    variable passed by reference or raw pointer?
+
+    Individual definitions from the parse tree (Classes, Functions, Constants)
+    are passed to the Jinja2 template engine where they are manipulated to
+    produce Matlab mex sources.
+
+    A common call tree for constructing and using a ParseTree object is:
+
+      # parse a set of definitions into a dictionary of namespaces
+      parser = CppHeaderParser()
+      ns['core'] = parser.parse('path/to/opencv/core.hpp')
+
+      # refactor into a semantic tree
+      parse_tree = ParseTree()
+      parse_tree.build(ns)
+
+      # iterate over the tree
+      for namespace in parse_tree.namespaces:
+        for clss in namespace.classes:
+          # do stuff
+        for method in namespace.methods:
+          # do stuff
+
+    Calling 'print' on a ParseTree object will reconstruct the definitions
+    to produce an output resembling the original C++ code.
+    """
+    def __init__(self, namespaces=None):
+        self.namespaces = namespaces if namespaces else []
+
+    def __str__(self):
+        return join((ns.__str__() for ns in self.namespaces), '\n\n\n')
+
+    def build(self, namespaces):
+        babel = Translator()
+        for name, definitions in namespaces.items():
+            class_tree = {}
+            methods = []
+            constants = []
+            for defn in definitions:
+                obj = babel.translate(defn)
+                if obj is None:
+                    continue
+                if type(obj) is Class or obj.clss:
+                    self.insertIntoClassTree(obj, class_tree)
+                elif type(obj) is Method:
+                    methods.append(obj)
+                elif type(obj) is Constant:
+                    constants.append(obj)
+                else:
+                    raise TypeError('Unexpected object type: '+str(type(obj)))
+            self.namespaces.append(Namespace(name, constants, class_tree.values(), methods))
+
+    def insertIntoClassTree(self, obj, class_tree):
+        cname = obj.name if type(obj) is Class else obj.clss
+        if not cname:
+            return
+        if not cname in class_tree:
+          # add a new class to the tree
+            class_tree[cname] = Class(cname)
+        # insert the definition into the class
+        val = class_tree[cname]
+        if type(obj) is Method:
+            val.methods.append(obj)
+        elif type(obj) is Constant:
+            val.constants.append(obj)
+        else:
+            raise TypeError('Unexpected object type: '+str(type(obj)))
+
+
+
+class Translator(object):
+    """
+    The Translator class does the heavy lifting of translating the nested
+    list representation of the hdr_parser into individual definitions that
+    are inserted into the ParseTree.
+    Translator consists of a top-level method: translate()
+    along with a number of helper methods: translateClass(), translateMethod(),
+    translateArgument(), translateConstant(), translateName(), and
+    translateClassName()
+    """
+    def translate(self, defn):
+        # --- class ---
+        # classes have 'class' prefixed on their name
+        if 'class' in defn[0].split(' ') or 'struct' in defn[0].split(' '):
+            return self.translateClass(defn)
+        # --- operators! ---
+        #TODO: implement operators: http://www.mathworks.com.au/help/matlab/matlab_oop/implementing-operators-for-your-class.html
+        if 'operator' in defn[0]:
+            return
+        # --- constant ---
+        elif convertibleToInt(defn[1]):
+            return self.translateConstant(defn)
+        # --- function ---
+        # functions either need to have input arguments, or not uppercase names
+        elif defn[3] or not self.translateName(defn[0]).split('_')[0].isupper():
+            return self.translateMethod(defn)
+        # --- constant ---
+        else:
+            return self.translateConstant(defn)
+
+    def translateClass(self, defn):
+        return Class()
+
+    def translateMethod(self, defn, class_tree=None):
+        name = self.translateName(defn[0])
+        clss = self.translateClassName(defn[0])
+        rtp  = defn[1]
+        static = True if 'S' in ''.join(defn[2]) else False
+        args = defn[3]
+        req  = []
+        opt = []
+        for arg in args:
+            if arg:
+                a = self.translateArgument(arg)
+                opt.append(a) if a.default else req.append(a)
+        return Method(name, clss, static, '', rtp, False, req, opt)
+
+    def translateConstant(self, defn):
+        const = True if 'const' in defn[0] else False
+        name  = self.translateName(defn[0])
+        clss  = self.translateClassName(defn[0])
+        tp    = 'int'
+        val   = defn[1]
+        return Constant(name, clss, tp, const, '', val)
+
+    def translateArgument(self, defn):
+        ref   = '*' if '*' in defn[0] else ''
+        ref   = '&' if '&' in defn[0] else ref
+        const = ' const ' in ' '+defn[0]+' '
+        tp    = " ".join([word for word in defn[0].replace(ref, '').split() if not ' const ' in ' '+word+' '])
+        name = defn[1]
+        default = defn[2] if defn[2] else ''
+        modifiers = ''.join(defn[3])
+        I = True if not modifiers or 'I' in modifiers else False
+        O = True if 'O' in modifiers else False
+        return Argument(name, tp, const, I, O, ref, default)
+
+    def translateName(self, name):
+        return name.split(' ')[-1].split('.')[-1]
+
+    def translateClassName(self, name):
+        name  = name.split(' ')[-1]
+        parts = name.split('.')
+        return parts[-2] if len(parts) > 1 and not parts[-2] == 'cv' else ''
+
+
+
+class Namespace(object):
+    """
+    Namespace
+      |
+      |- name
+      |- Constants
+      |- Methods
+      |- Constants
+    """
+    def __init__(self, name='', constants=None, classes=None, methods=None):
+        self.name = name
+        self.constants = constants if constants else []
+        self.classes   = classes   if classes   else []
+        self.methods = methods if methods else []
+
+    def __str__(self):
+        return 'namespace '+self.name+' {\n\n'+\
+          (join((c.__str__() for c in self.constants), '\n')+'\n\n' if self.constants else '')+\
+          (join((f.__str__() for f in self.methods), '\n')+'\n\n' if self.methods else '')+\
+          (join((o.__str__() for o in self.classes), '\n\n')        if self.classes   else '')+'\n};'
+
+class Class(object):
+    """
+    Class
+      |
+      |- name
+      |- Methods
+      |- Constants
+    """
+    def __init__(self, name='', namespace='', constants=None, methods=None):
+        self.name = name
+        self.namespace = namespace
+        self.constants = constants if constants else []
+        self.methods = methods if methods else []
+
+    def __str__(self):
+        return 'class '+self.name+' {\n\t'+\
+          (join((c.__str__() for c in self.constants), '\n\t')+'\n\n\t' if self.constants else '')+\
+          (join((f.__str__() for f in self.methods), '\n\t')          if self.methods else '')+'\n};'
+
+class Method(object):
+    """
+    Method
+    int VideoWriter::read( cv::Mat& frame, const cv::Mat& mask=cv::Mat() );
+    ---    -----     ----     --------           ----------------
+    rtp    class     name     required               optional
+
+    name      the method name
+    clss      the class the method belongs to ('' if free)
+    static    static?
+    namespace the namespace the method belongs to ('' if free)
+    rtp       the return type
+    const     const?
+    req       list of required arguments
+    opt       list of optional arguments
+    """
+    def __init__(self, name='', clss='', static=False, namespace='', rtp='', const=False, req=None, opt=None):
+        self.name  = name
+        self.clss  = clss
+        self.constructor = True if name == clss else False
+        self.static = static
+        self.const = const
+        self.namespace = namespace
+        self.rtp = rtp
+        self.req = req if req else []
+        self.opt = opt if opt else []
+
+    def __str__(self):
+        return (self.rtp+' ' if self.rtp else '')+self.name+'('+\
+          join((arg.__str__() for arg in self.req+self.opt), ', ')+\
+          ')'+(' const' if self.const else '')+';'
+
+class Argument(object):
+    """
+    Argument
+    const cv::Mat&  mask=cv::Mat()
+    -----  ---- --- ----  -------
+    const   tp  ref name  default
+
+    name    the argument name
+    tp      the argument type
+    const   const?
+    I       is the argument treated as an input?
+    O       is the argument treated as an output (return by reference)
+    ref     is the argument passed by reference? ('*'/'&')
+    default the default value of the argument ('' if required)
+    """
+    def __init__(self, name='', tp='', const=False, I=True, O=False, ref='', default=''):
+        self.name = name
+        self.tp   = tp
+        self.ref  = ref
+        self.I    = I
+        self.O    = O
+        self.const = const
+        self.default = default
+
+    def __str__(self):
+        return ('const ' if self.const else '')+self.tp+self.ref+\
+                ' '+self.name+('='+self.default if self.default else '')
+
+class Constant(object):
+    """
+    Constant
+    DFT_COMPLEX_OUTPUT = 12;
+         ----          -------
+         name          default
+
+    name    the name of the constant
+    clss    the class that the constant belongs to ('' if free)
+    tp      the type of the constant ('' if int)
+    const   const?
+    ref     is the constant a reference? ('*'/'&')
+    default default value, required for constants
+    """
+    def __init__(self, name='', clss='', tp='', const=False, ref='', default=''):
+        self.name = name
+        self.clss = clss
+        self.tp   = tp
+        self.ref  = ref
+        self.const = const
+        self.default = default
+
+    def __str__(self):
+        return ('const ' if self.const else '')+self.tp+self.ref+\
+                ' '+self.name+('='+self.default if self.default else '')+';'
+
+def constants(tree):
+    """
+    recursive generator to strip all Constant objects from the ParseTree
+    and place them into a flat dictionary of { name, value (default) }
+    """
+    if isinstance(tree, dict) and 'constants' in tree and isinstance(tree['constants'], list):
+        for node in tree['constants']:
+            yield (node['name'], node['default'])
+    if isinstance(tree, dict):
+        for key, val in tree.items():
+            for gen in constants(val):
+                yield gen
+    if isinstance(tree, list):
+        for val in tree:
+            for gen in constants(val):
+                yield gen
+
+def todict(obj, classkey=None):
+    """
+    Convert the ParseTree to a dictionary, stripping all objects of their
+    methods and converting class names to strings
+    """
+    if isinstance(obj, dict):
+        for k in obj.keys():
+            obj[k] = todict(obj[k], classkey)
+        return obj
+    elif hasattr(obj, "__iter__"):
+        return [todict(v, classkey) for v in obj]
+    elif hasattr(obj, "__dict__"):
+        data = dict([(key, todict(value, classkey))
+            for key, value in obj.__dict__.iteritems()
+            if not callable(value) and not key.startswith('_')])
+        if classkey is not None and hasattr(obj, "__class__"):
+            data[classkey] = obj.__class__.__name__
+        return data
+    else:
+        return obj
--- a/modules/matlab/generator/templates/init.py
+++ b/modules/matlab/generator/templates/init.py
--- a/modules/matlab/generator/templates/functional.cpp
+++ b/modules/matlab/generator/templates/functional.cpp
@@ -0,0 +1,149 @@
+/*
+ * compose
+ * compose a function call
+ * This macro takes as input a Method object and composes
+ * a function call by inspecting the types and argument names
+ */
+{% macro compose(fun) %}
+  {# ----------- Return type ------------- #}
+  {%- if not fun.rtp|void and not fun.constructor -%} retval = {% endif -%}
+  {%- if fun.constructor -%}{{fun.clss}} obj = {% endif -%}
+  {%- if fun.clss and not fun.constructor -%}inst.{%- else -%} cv:: {%- endif -%}
+  {{fun.name}}(
+  {#- ----------- Required ------------- -#}
+  {%- for arg in fun.req -%}
+    {%- if arg.ref == '*' -%}&{%- endif -%}
+    {{arg.name}}
+    {%- if not loop.last %}, {% endif %}
+  {% endfor %}
+  {#- ----------- Optional ------------- -#}
+  {% if fun.req and fun.opt %}, {% endif %}
+  {%- for opt in fun.opt -%}
+    {%- if opt.ref == '*' -%}&{%- endif -%}
+    {{opt.name}}
+    {%- if not loop.last -%}, {% endif %}
+  {%- endfor -%}
+  );
+{%- endmacro %}
+
+
+/*
+ * composeMatlab
+ * compose a Matlab function call
+ * This macro takes as input a Method object and composes
+ * a Matlab function call by inspecting the types and argument names
+ */
+{% macro composeMatlab(fun) %}
+  {# ----------- Return type ------------- #}
+  {%- if fun|noutputs > 1 -%}[{% endif -%}
+  {%- if not fun.rtp|void -%}LVALUE{% endif -%}
+  {%- if not fun.rtp|void and fun|noutputs > 1 -%},{% endif -%}
+  {# ------------- Outputs ------------- -#}
+  {%- for arg in fun.req|outputs + fun.opt|outputs -%}
+    {{arg.name}}
+    {%- if arg.I -%}_out{%- endif -%}
+    {%- if not loop.last %}, {% endif %}
+  {% endfor %}
+  {%- if fun|noutputs > 1 -%}]{% endif -%}
+  {%- if fun|noutputs %} = {% endif -%}
+  cv.{{fun.name}}(
+  {#- ------------ Inputs -------------- -#}
+  {%- for arg in fun.req|inputs + fun.opt|inputs -%}
+    {{arg.name}}
+    {%- if arg.O -%}_in{%- endif -%}
+    {%- if not loop.last %}, {% endif -%}
+  {% endfor -%}
+  );
+{%- endmacro %}
+
+
+/*
+ * composeVariant
+ * compose a variant call for the ArgumentParser
+ */
+{% macro composeVariant(fun) %}
+addVariant("{{ fun.name }}", {{ fun.req|inputs|length }}, {{ fun.opt|inputs|length }}
+{%- if fun.opt|inputs|length %}, {% endif -%}
+{%- for arg in fun.opt|inputs -%}
+  "{{arg.name}}"
+  {%- if not loop.last %}, {% endif -%}
+{% endfor -%}
+)
+{%- endmacro %}
+
+
+/*
+ * composeWithExceptionHandler
+ * compose a function call wrapped in exception traps
+ * This macro takes an input a Method object and composes a function
+ * call through the compose() macro, then wraps the return in traps
+ * for cv::Exceptions, std::exceptions, and all generic exceptions
+ * and returns a useful error message to the Matlab interpreter
+ */
+{%- macro composeWithExceptionHandler(fun) -%}
+  // call the opencv function
+  // [out =] namespace.fun(src1, ..., srcn, dst1, ..., dstn, opt1, ..., optn);
+  try {
+    {{ compose(fun) }}
+  } catch(cv::Exception& e) {
+    error(std::string("cv::exception caught: ").append(e.what()).c_str());
+  } catch(std::exception& e) {
+    error(std::string("std::exception caught: ").append(e.what()).c_str());
+  } catch(...) {
+    error("Uncaught exception occurred in {{fun.name}}");
+  }
+{%- endmacro %}
+
+
+/*
+ * handleInputs
+ * unpack input arguments from the Bridge
+ * Given an input Bridge object, this unpacks the object from the Bridge and
+ * casts them into the correct type
+ */
+{%- macro handleInputs(fun) %}
+
+  {% if fun|ninputs or (fun|noutputs and not fun.constructor) %}
+  // unpack the arguments
+  {# ----------- Inputs ------------- #}
+  {% for arg in fun.req|inputs %}
+  {{arg.tp}} {{arg.name}} = inputs[{{ loop.index0 }}].to{{arg.tp|toUpperCamelCase}}();
+  {% endfor %}
+  {% for opt in fun.opt|inputs %}
+  {{opt.tp}} {{opt.name}} = inputs[{{loop.index0 + fun.req|inputs|length}}].empty() ? {% if opt.ref == '*' -%} {{opt.tp}}() {%- else -%} {{opt.default}} {%- endif %} : inputs[{{loop.index0 + fun.req|inputs|length}}].to{{opt.tp|toUpperCamelCase}}();
+  {% endfor %}
+  {# ----------- Outputs ------------ #}
+  {% for arg in fun.req|only|outputs %}
+  {{arg.tp}} {{arg.name}};
+  {% endfor %}
+  {% for opt in fun.opt|only|outputs %}
+  {{opt.tp}} {{opt.name}};
+  {% endfor %}
+  {% if not fun.rtp|void and not fun.constructor %}
+  {{fun.rtp}} retval;
+  {% endif %}
+  {% endif %}
+
+{%- endmacro %}
+
+/*
+ * handleOutputs
+ * pack outputs into the bridge
+ * Given a set of outputs, this methods assigns them into the bridge for
+ * return to the calling method
+ */
+{%- macro handleOutputs(fun) %}
+
+  {% if fun|noutputs %}
+  // assign the outputs into the bridge
+  {% if not fun.rtp|void and not fun.constructor %}
+  outputs[0] = retval;
+  {% endif %}
+  {% for arg in fun.req|outputs %}
+  outputs[{{loop.index0 + fun.rtp|void|not}}] = {{arg.name}};
+  {% endfor %}
+  {% for opt in fun.opt|outputs %}
+  outputs[{{loop.index0 + fun.rtp|void|not + fun.req|outputs|length}}] = {{opt.name}};
+  {% endfor %}
+  {% endif %}
+{%- endmacro %}
--- a/modules/matlab/generator/templates/template_build_info.m
+++ b/modules/matlab/generator/templates/template_build_info.m
@@ -0,0 +1,41 @@
+function buildInformation()
+%CV.BUILDINFORMATION display OpenCV Toolbox build information
+%
+%   Call CV.BUILDINFORMATION() to get a printout of diagonstic information
+%   pertaining to your particular build of the OpenCV Toolbox. If you ever
+%   run into issues with the Toolbox, it is useful to submit this
+%   information alongside a bug report to the OpenCV team.
+%
+%   Copyright {{ time.strftime("%Y", time.localtime()) }} The OpenCV Foundation
+%
+info = {
+'  ------------------------------------------------------------------------'
+'                              <strong>OpenCV Toolbox</strong>'
+'                     Build and diagnostic information'
+'  ------------------------------------------------------------------------'
+''
+'  <strong>Platform</strong>'
+'  OS:            {{ build.os }}'
+'  Architecture:  {{ build.arch[0] }}-bit {{ build.arch[1] }}'
+'  Compiler:      {{ build.compiler | csv(' ') }}'
+''
+'  <strong>Matlab</strong>'
+['  Version:       ' version()]
+['  Mex extension: ' mexext()]
+'  Architecture:  {{ build.mex_arch }}'
+'  Mex path:      {{ build.mex_script }}'
+'  Mex flags:     {{ build.mex_opts | csv(' ') }}'
+'  CXX flags:     {{ build.cxx_flags | csv(' ') | stripExtraSpaces | wordwrap(60, True, '\'\n\'                 ') }}'
+''
+'  <strong>OpenCV</strong>'
+'  Version:       {{ build.opencv_version }}'
+'  Commit:        {{ build.commit }}'
+'  Configuration: {{ build.configuration }}'
+'  Modules:       {{ build.modules | csv | wordwrap(60, True, '\'\n\'                 ') }}'
+''
+};
+
+info = cellfun(@(x) [x '\n'], info, 'UniformOutput', false);
+info = horzcat(info{:});
+fprintf(info);
+end
--- a/modules/matlab/generator/templates/template_class_base.cpp
+++ b/modules/matlab/generator/templates/template_class_base.cpp
@@ -0,0 +1,98 @@
+{% import 'functional.cpp' as functional %}
+/*
+ * file:   {{clss.name}}Bridge.cpp
+ * author: A trusty code generator
+ * date:   {{time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())}}
+ *
+ * This file was autogenerated, do not modify.
+ * See LICENSE for full modification and redistribution details.
+ * Copyright {{time.strftime("%Y", time.localtime())}} The OpenCV Foundation
+ */
+#include <mex.h>
+#include <vector>
+#include <string>
+#include <opencv2/matlab/map.hpp>
+#include <opencv2/matlab/bridge.hpp>
+#include <opencv2/core.hpp>
+using namespace cv;
+using namespace matlab;
+using namespace bridge;
+
+namespace {
+
+typedef std::vector<Bridge> (*)({{clss.name}}&, const std::vector<Bridge>&) MethodSignature;
+
+{% for function in clss.methods %}
+
+{% if function.constructor %}
+// wrapper for {{function.name}}() constructor
+{{ function.clss }} {{function.name}}(const std::vector<Bridge>& inputs) {
+  {{ functional.handleInputs(function) }}
+  {{ functional.compose(function) }}
+  return obj;
+}
+{% else %}
+// wrapper for {{function.name}}() method
+std::vector<Bridge> {{function.name}}({{clss.name}}& inst, const std::vector<Bridge>& inputs) {
+  std::vector<Bridge> outputs{% if function|noutputs %}({{function|noutputs}}){% endif %};
+  {{ functional.handleInputs(function) }}
+  {{ functional.composeWithExceptionHandler(function) }}
+  {{ functional.handleOutputs(function) }}
+  return outputs;
+}
+{% endif %}
+{% endfor %}
+
+Map<std::string, MethodSignature> createMethodMap() {
+  Map<std::string, MethodSignature> m;
+  {% for function in clss.methods %}
+  m["{{function.name}}"] = &{{function.name}};
+  {% endfor %}
+
+  return m;
+}
+static const Map<std::string, MethodSignature> methods = createMethodMap();
+
+// map of created {{clss.name}} instances. Don't trust the user to keep them safe...
+static Map<void *, {{clss.name}}> instances;
+
+/*
+ * {{ clss.name }}
+ * Gateway routine
+ *   nlhs - number of return arguments
+ *   plhs - pointers to return arguments
+ *   nrhs - number of input arguments
+ *   prhs - pointers to input arguments
+ */
+void mexFunction(int nlhs, mxArray* plhs[],
+                 int nrhs, const mxArray* prhs[]) {
+
+  // parse the inputs
+  Bridge method_name(prhs[0]);
+
+  Bridge handle(prhs[1]);
+  std::vector<Bridge> brhs(prhs+2, prhs+nrhs);
+
+  // retrieve the instance of interest
+  try {
+    {{clss.name}}& inst = instances.at(handle.address());
+  } catch (const std::out_of_range& e) {
+    mexErrMsgTxt("Invalid object instance provided");
+  }
+
+  // invoke the correct method on the data
+  try {
+    std::vector<Bridge> blhs = (*methods.at(method_name))(inst, brhs);
+  } catch (const std::out_of_range& e) {
+    mexErrMsgTxt("Unknown method specified");
+  }
+
+  {% block postfun %}
+  {% endblock %}
+
+  {% block cleanup %}
+  {% endblock %}
+
+}
+
+} // end namespace
--- a/modules/matlab/generator/templates/template_class_base.m
+++ b/modules/matlab/generator/templates/template_class_base.m
@@ -0,0 +1,31 @@
+% {{clss.name | upper}}
+% Matlab handle class for OpenCV object classes
+%
+% This file was autogenerated, do not modify.
+% See LICENSE for full modification and redistribution details.
+% Copyright {{time.strftime("%Y", time.localtime())}} The OpenCV Foundation
+classdef {{clss.name}} < handle
+  properties (SetAccess = private, Hidden = true)
+    ptr_ = 0; % handle to the underlying c++ clss instance
+  end
+
+  methods
+    % constructor
+    function this = {{clss.name}}(varargin)
+      this.ptr_ = {{clss.name}}Bridge('new', varargin{:});
+    end
+
+    % destructor
+    function delete(this)
+      {{clss.name}}Bridge(this.ptr_, 'delete');
+    end
+
+    {% for function in clss.functions %}
+    % {{function.__str__()}}
+    function varargout = {{function.name}}(this, varargin)
+      [varargout{1:nargout}] = {{clss.name}}Bridge('{{function.name}}', this.ptr_, varargin{:});
+    end
+
+    {% endfor %}
+  end
+end
--- a/modules/matlab/generator/templates/template_cvmex_base.m
+++ b/modules/matlab/generator/templates/template_cvmex_base.m
@@ -0,0 +1,46 @@
+function mex(varargin)
+%CV.MEX compile MEX-function with OpenCV linkages
+%
+%   Usage:
+%       CV.MEX [options ...] file [file file ...]
+%
+%   Description:
+%       CV.MEX compiles one or more C/C++ source files into a shared-library
+%       called a mex-file. This function is equivalent to the builtin MEX
+%       routine, with the notable exception that it automatically resolves
+%       OpenCV includes, and links in the OpenCV libraries where appropriate.
+%       It also forwards the flags used to build OpenCV, so architecture-
+%       specific optimizations can be used.
+%
+%       CV.MEX is designed to be used in situations where the source(s) you
+%       are compiling contain OpenCV definitions. In such cases, it streamlines
+%       the finding and including of appropriate OpenCV libraries.
+%
+%   See also: mex
+%
+%   Copyright {{ time.strftime("%Y", time.localtime()) }} The OpenCV Foundation
+%
+
+  % forward the OpenCV build flags (C++ only)
+  EXTRA_FLAGS  = ['"CXXFLAGS="\$CXXFLAGS '...
+                  '{{ cv.flags | trim | wordwrap(60, false, '\'...\n                  \'') }}""'];
+
+  % add the OpenCV include dirs
+  INCLUDE_DIRS = {{ cv.include_dirs | split | cellarray | wordwrap(60, false, '...\n                  ') }};
+
+  % add the lib dir (singular in both build tree and install tree)
+  LIB_DIR      = '{{ cv.lib_dir }}';
+
+  % add the OpenCV libs. Only the used libs will actually be linked
+  LIBS         = {{ cv.libs | split | cellarray | wordwrap(60, false, '...\n                  ') }};
+
+  % add the mex opts (usually at least -largeArrayDims)
+  OPTS         = {{ cv.opts | split | cellarray | wordwrap(60, false, '...\n                  ') }};
+
+  % merge all of the default options (EXTRA_FLAGS, LIBS, etc) and the options
+  % and files passed by the user (varargin) into a single cell array
+  merged       = [ {EXTRA_FLAGS}, INCLUDE_DIRS, {LIB_DIR}, LIBS, OPTS, varargin ];
+
+  % expand the merged argument list into the builtin mex utility
+  mex(merged{:});
+end
--- a/modules/matlab/generator/templates/template_doc_base.m
+++ b/modules/matlab/generator/templates/template_doc_base.m
@@ -0,0 +1,62 @@
+{% import 'functional.cpp' as functional %}
+{{ ('CV.' + fun.name | upper + ' ' + doc.brief | stripTags) | comment(75, '%') | matlabURL }}
+%
+%   {{ functional.composeMatlab(fun) | upper }}
+{% if doc.long %}
+{{ doc.long | stripTags | qualify(fun.name) | comment(75, '%   ') | matlabURL }}
+{% endif %}
+%
+{# ----------------------- Returns --------------------- #}
+{% if fun.rtp|void|not or fun.req|outputs|length or fun.opt|outputs|length %}
+%   Returns:
+{% if fun.rtp|void|not %}
+%      LVALUE
+{% endif %}
+{% for arg in fun.req|outputs + fun.opt|outputs %}
+{% set uname = arg.name | upper + ('_OUT' if arg.I else '') %}
+{% if arg.name in doc.params %}
+{{ (uname + ' ' + doc.params[arg.name]) | stripTags | comment(75, '%     ') }}
+{% else %}
+{{ uname }}
+{% endif %}
+{% endfor %}
+%
+{% endif %}
+{# ----------------- Required Inputs ------------------- #}
+{% if fun.req|inputs|length %}
+%   Required Inputs:
+{% for arg in fun.req|inputs %}
+{% set uname = arg.name | upper + ('_IN' if arg.O else '') %}
+{% if arg.name in doc.params %}
+{{ (uname + ' ' + doc.params[arg.name]) | stripTags | comment(75, '%     ') }}
+{% else %}
+{% endif %}
+{% endfor %}
+%
+{% endif %}
+{# ------------------ Optional Inputs ------------------- #}
+{% if fun.opt|inputs|length %}
+%   Optional Inputs:
+{% for arg in fun.opt|inputs %}
+{% set uname = arg.name | upper + ('_IN' if arg.O else '') + ' (default: ' + arg.default + ')' %}
+{% if arg.name in doc.params %}
+{{ (uname + ' ' + doc.params[arg.name]) | stripTags | comment(75, '%     ') }}
+{% else %}
+{{ uname }}
+{% endif %}
+{% endfor %}
+%
+{% endif %}
+{# ---------------------- See also --------------------- #}
+{% if 'seealso' in doc %}
+%   See also: {% for item in doc['seealso'] %}
+cv.{{ item }}{% if not loop.last %}, {% endif %}
+{% endfor %}
+
+%
+{% endif %}
+{# ----------------------- Online ---------------------- #}
+{% set url = 'http://docs.opencv.org/modules/' + doc.module  + '/doc/' + (doc.file|filename) + '.html#' + (fun.name|slugify) %}
+%   Online docs: {{ url | matlabURL }}
+%   Copyright {{ time.strftime("%Y", time.localtime()) }} The OpenCV Foundation
+%
--- a/modules/matlab/generator/templates/template_function_base.cpp
+++ b/modules/matlab/generator/templates/template_function_base.cpp
@@ -0,0 +1,60 @@
+{% import 'functional.cpp' as functional %}
+/*
+ * file:   {{fun.name}}.cpp
+ * author: A trusty code generator
+ * date:   {{time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())}}
+ *
+ * This file was autogenerated, do not modify.
+ * See LICENSE for full modification and redistribution details.
+ * Copyright {{time.strftime("%Y", time.localtime())}} The OpenCV Foundation
+ */
+#include <string>
+#include <vector>
+#include <cassert>
+#include <exception>
+#include <opencv2/matlab/bridge.hpp>
+#include <opencv2/{{includes}}.hpp>
+using namespace cv;
+using namespace matlab;
+using namespace bridge;
+
+/*
+ * {{ fun.name }}
+ * {{ fun }}
+ * Gateway routine
+ *   nlhs - number of return arguments
+ *   plhs - pointers to return arguments
+ *   nrhs - number of input arguments
+ *   prhs - pointers to input arguments
+ */
+void mexFunction(int nlhs, mxArray*{% if fun|noutputs %} plhs[]{% else %}*{% endif %},
+                 int nrhs, const mxArray*{% if fun|ninputs %} prhs[]{% else %}*{% endif %}) {
+
+  {% if fun|ninputs %}
+  // parse the inputs
+  ArgumentParser parser("{{fun.name}}");
+  parser.{{ functional.composeVariant(fun) }};
+  MxArrayVector sorted = parser.parse(MxArrayVector(prhs, prhs+nrhs));
+  {% endif %}
+
+  {% if fun|ninputs or fun|noutputs %}
+  // setup
+  {% if fun|ninputs %}
+  BridgeVector inputs(sorted.begin(), sorted.end());
+  {% endif -%}
+  {%- if fun|noutputs %}
+  BridgeVector outputs({{fun|noutputs}});
+  {% endif %}
+  {% endif %}
+
+  {{ functional.handleInputs(fun) }}
+  {{ functional.composeWithExceptionHandler(fun) }}
+  {{ functional.handleOutputs(fun) }}
+
+  {% if fun|noutputs %}
+  // push the outputs back to matlab
+  for (size_t n = 0; n < static_cast<size_t>(std::max(nlhs,1)); ++n) {
+    plhs[n] = outputs[n].toMxArray().releaseOwnership();
+  }
+  {% endif %}
+}
--- a/modules/matlab/generator/templates/template_map_base.m
+++ b/modules/matlab/generator/templates/template_map_base.m
@@ -0,0 +1,71 @@
+% ------------------------------------------------------------------------
+%                             <strong>OpenCV Toolbox</strong>
+%                  Matlab bindings for the OpenCV library
+% ------------------------------------------------------------------------
+%
+% The OpenCV Toolbox allows you to make calls to native OpenCV methods
+% and classes directly from within Matlab.
+%
+% <strong>PATHS</strong>
+% To call OpenCV methods from anywhere in your workspace, add the
+% directory containing this file to the path:
+%
+%     addpath(fileparts(which('cv')));
+%
+% The OpenCV Toolbox contains two important locations:
+%     cv.m - This file, containing OpenCV enums
+%     +cv/ - The directory containing the OpenCV methods and classes
+%
+% <strong>CALLING SYNTAX</strong>
+% To call an OpenCV method, class or enum, it must be prefixed with the
+% 'cv' qualifier. For example:
+%
+%     % perform a Fourier transform
+%     Xf = cv.dft(X, cv.DFT_COMPLEX_OUTPUT);
+%
+%     % create a VideoCapture object, and open a file
+%     camera = cv.VideoCapture();
+%     camera.open('/path/to/file');
+%
+% You can specify optional arguments by name, similar to how python
+% and many builtin Matlab functions work. For example, the cv.dft
+% method used above has an optional 'nonzeroRows' argument. If
+% you want to specify that, but keep the default 'flags' behaviour,
+% simply call the method as:
+%
+%     Xf = cv.dft(X, 'nonzeroRows', 7);
+%
+% <strong>HELP</strong>
+% Each method has its own help file containing information about the
+% arguments, return values, and what operation the method performs.
+% You can access this help information by typing:
+%
+%     help cv.methodName
+%
+% The full list of methods can be found by inspecting the +cv/
+% directory. Note that the methods available to you will depend
+% on which modules you configured OpenCV to build.
+%
+% <strong>DIAGNOSTICS</strong>
+% If you are having problems with the OpenCV Toolbox and need to send a
+% bug report to the OpenCV team, you can get a printout of diagnostic
+% information to submit along with your report by typing:
+%
+%     <a href="matlab: cv.buildInformation()">cv.buildInformation();</a>
+%
+% <strong>OTHER RESOURCES</strong>
+% OpenCV documentation online: <a href="matlab: web('http://docs.opencv.org', '-browser')">http://docs.opencv.org</a>
+% OpenCV issue tracker: <a href="matlab: web('http://code.opencv.org', '-browser')">http://code.opencv.org</a>
+% OpenCV Q&A: <a href="matlab: web('http://answers.opencv.org', '-browser')">http://answers.opencv.org</a>
+%
+% See also: cv.help, <a href="matlab: cv.buildInformation()">cv.buildInformation</a>
+%
+% Copyright {{ time.strftime("%Y", time.localtime()) }} The OpenCV Foundation
+%
+classdef cv
+    properties (Constant = true)
+    {% for key, val in constants.items() %}
+        {{key}} = {{val|formatMatlabConstant(constants)}};
+    {% endfor %}
+    end
+end
--- a/modules/matlab/include/opencv2/matlab/bridge.hpp
+++ b/modules/matlab/include/opencv2/matlab/bridge.hpp
@@ -0,0 +1,536 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this
+//  license. If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote
+//     products derived from this software without specific prior written
+//     permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or business
+// interruption) however caused and on any theory of liability, whether in
+// contract, strict liability, or tort (including negligence or otherwise)
+// arising in any way out of the use of this software, even if advised of the
+// possibility of such damage.
+//
+////////////////////////////////////////////////////////////////////////////////
+#ifndef OPENCV_BRIDGE_HPP_
+#define OPENCV_BRIDGE_HPP_
+
+#include "mxarray.hpp"
+#include <vector>
+#include <string>
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/calib3d.hpp>
+
+namespace cv {
+namespace bridge {
+
+/*
+ * Custom typedefs
+ * Parsed names from the hdr_parser
+ */
+typedef std::vector<cv::Mat> vector_Mat;
+typedef std::vector<cv::Point> vector_Point;
+typedef std::vector<int> vector_int;
+typedef std::vector<float> vector_float;
+typedef std::vector<cv::String> vector_String;
+typedef std::vector<unsigned char> vector_uchar;
+typedef std::vector<std::vector<char> > vector_vector_char;
+typedef std::vector<std::vector<cv::DMatch> > vector_vector_DMatch;
+typedef std::vector<cv::Rect> vector_Rect;
+typedef std::vector<cv::KeyPoint> vector_KeyPoint;
+typedef cv::Ptr<cv::StereoBM> Ptr_StereoBM;
+typedef cv::Ptr<cv::StereoSGBM> Ptr_StereoSGBM;
+typedef cv::Ptr<cv::FeatureDetector> Ptr_FeatureDetector;
+typedef cv::Ptr<CLAHE> Ptr_CLAHE;
+
+
+// ----------------------------------------------------------------------------
+//                          PREDECLARATIONS
+// ----------------------------------------------------------------------------
+class Bridge;
+typedef std::vector<Bridge> BridgeVector;
+
+template <typename InputScalar, typename OutputScalar>
+void deepCopyAndTranspose(const cv::Mat& src, matlab::MxArray& dst);
+
+template <typename InputScalar, typename OutputScalar>
+void deepCopyAndTranspose(const matlab::MxArray& src, cv::Mat& dst);
+
+
+
+
+// ----------------------------------------------------------------------------
+//                                 BRIDGE
+// ----------------------------------------------------------------------------
+
+/*!
+ * @class Bridge
+ * @brief Type conversion class for converting OpenCV and native C++ types
+ *
+ * Bridge provides an interface for converting between OpenCV/C++ types
+ * to Matlab's mxArray format.
+ *
+ * Each type conversion requires three operators:
+ *    // conversion from ObjectType --> Bridge
+ *    Bridge& operator=(const ObjectType&);
+ *    // implicit conversion from Bridge --> ObjectType
+ *    operator ObjectType();
+ *    // explicit conversion from Bridge --> ObjectType
+ *    ObjectType toObjectType();
+ *
+ * The bridging class provides common conversions between OpenCV types,
+ * std and stl types to Matlab's mxArray format. By inheriting Bridge,
+ * you can add your own custom type conversions.
+ *
+ * Because Matlab uses a homogeneous storage type, all operations are provided
+ * relative to Matlab's type. That is, Bridge always stores an matlab::MxArray object
+ * and converts to and from other object types on demand.
+ *
+ * NOTE: for the explicit conversion function, the object name must be
+ * in UpperCamelCase, for example:
+ *    int --> toInt
+ *    my_object --> MyObject
+ *    my_Object --> MyObject
+ *    myObject  --> MyObject
+ * this is because the binding generator standardises the calling syntax.
+ *
+ * Bridge attempts to make as few assumptions as possible, however in
+ * some cases where 1-to-1 mappings don't exist, some assumptions are necessary.
+ * In particular:
+ *  - conversion from of a 2-channel Mat to an mxArray will result in a complex
+ *    output
+ *  - conversion from multi-channel interleaved Mats will result in
+ *    multichannel planar mxArrays
+ *
+ */
+class Bridge {
+private:
+  matlab::MxArray ptr_;
+public:
+  // bridges are default constructible
+  Bridge() {}
+  virtual ~Bridge() {}
+
+  // --------------------------------------------------------------------------
+  //                         Bridge Properties
+  // --------------------------------------------------------------------------
+  bool empty() const { return ptr_.empty(); }
+
+  /*! @brief unpack an object from Matlab into C++
+   *
+   * this function checks whether the given bridge is derived from an
+   * object in Matlab. If so, it converts it to a (platform dependent)
+   * pointer to the underlying C++ object.
+   *
+   * NOTE! This function assumes that the C++ pointer is stored in inst_
+   */
+  template <typename Object>
+  Object* getObjectByName(const std::string& name) {
+    // check that the object is actually of correct type before unpacking
+    // TODO: Traverse class hierarchy?
+    if (!ptr_.isClass(name)) {
+      matlab::error(std::string("Expected class ").append(std::string(name))
+                        .append(" but was given ").append(ptr_.className()));
+    }
+    // get the instance field
+    matlab::MxArray inst = ptr_.field("inst_");
+    Object* obj = NULL;
+    // make sure the pointer is the correct size for the system
+    if (sizeof(void *) == 8 && inst.ID() == mxUINT64_CLASS) {
+      // 64-bit pointers
+      // TODO: Do we REALLY REALLY need to reinterpret_cast?
+      obj = reinterpret_cast<Object *>(inst.scalar<uint64_t>());
+    } else if (sizeof(void *) == 4 && inst.ID() == mxUINT32_CLASS) {
+      // 32-bit pointers
+      obj = reinterpret_cast<Object *>(inst.scalar<uint32_t>());
+    } else {
+      matlab::error("Incorrect pointer type stored for architecture");
+    }
+
+    // finally check if the object is NULL
+    matlab::conditionalError(obj, std::string("Object ").append(std::string(name)).append(std::string(" is NULL")));
+    return obj;
+  }
+
+
+  // --------------------------------------------------------------------------
+  //                           MATLAB TYPES
+  // --------------------------------------------------------------------------
+  Bridge& operator=(const mxArray* obj) { ptr_ = obj; return *this; }
+  Bridge& operator=(const matlab::MxArray& obj) { ptr_ = obj; return *this; }
+  Bridge(const matlab::MxArray& obj) : ptr_(obj) {}
+  Bridge(const mxArray* obj) : ptr_(obj) {}
+  matlab::MxArray toMxArray() { return ptr_; }
+
+
+  // --------------------------------------------------------------------------
+  //                         MATRIX CONVERSIONS
+  // --------------------------------------------------------------------------
+  Bridge& operator=(const cv::Mat& mat);
+  cv::Mat toMat() const;
+  operator cv::Mat() const { return toMat(); }
+
+  template <typename Scalar>
+  static matlab::MxArray FromMat(const cv::Mat& mat) {
+    matlab::MxArray arr(mat.rows, mat.cols, mat.channels(), matlab::Traits<Scalar>::ScalarType);
+    switch (mat.depth()) {
+      case CV_8U:  deepCopyAndTranspose<uint8_t,  Scalar>(mat, arr); break;
+      case CV_8S:  deepCopyAndTranspose<int8_t,   Scalar>(mat, arr); break;
+      case CV_16U: deepCopyAndTranspose<uint16_t, Scalar>(mat, arr); break;
+      case CV_16S: deepCopyAndTranspose<int16_t,  Scalar>(mat, arr); break;
+      case CV_32S: deepCopyAndTranspose<int32_t,  Scalar>(mat, arr); break;
+      case CV_32F: deepCopyAndTranspose<float,    Scalar>(mat, arr); break;
+      case CV_64F: deepCopyAndTranspose<double,   Scalar>(mat, arr); break;
+      default: matlab::error("Attempted to convert from unknown class");
+    }
+    return arr;
+  }
+
+  template <typename Scalar>
+  cv::Mat toMat() const {
+    cv::Mat mat(ptr_.rows(), ptr_.cols(), CV_MAKETYPE(cv::DataType<Scalar>::type, ptr_.channels()));
+    switch (ptr_.ID()) {
+      case mxINT8_CLASS:    deepCopyAndTranspose<int8_t,   Scalar>(ptr_, mat); break;
+      case mxUINT8_CLASS:   deepCopyAndTranspose<uint8_t,  Scalar>(ptr_, mat); break;
+      case mxINT16_CLASS:   deepCopyAndTranspose<int16_t,  Scalar>(ptr_, mat); break;
+      case mxUINT16_CLASS:  deepCopyAndTranspose<uint16_t, Scalar>(ptr_, mat); break;
+      case mxINT32_CLASS:   deepCopyAndTranspose<int32_t,  Scalar>(ptr_, mat); break;
+      case mxUINT32_CLASS:  deepCopyAndTranspose<uint32_t, Scalar>(ptr_, mat); break;
+      case mxINT64_CLASS:   deepCopyAndTranspose<int64_t,  Scalar>(ptr_, mat); break;
+      case mxUINT64_CLASS:  deepCopyAndTranspose<uint64_t, Scalar>(ptr_, mat); break;
+      case mxSINGLE_CLASS:  deepCopyAndTranspose<float,    Scalar>(ptr_, mat); break;
+      case mxDOUBLE_CLASS:  deepCopyAndTranspose<double,   Scalar>(ptr_, mat); break;
+      case mxCHAR_CLASS:    deepCopyAndTranspose<char,     Scalar>(ptr_, mat); break;
+      case mxLOGICAL_CLASS: deepCopyAndTranspose<int8_t,   Scalar>(ptr_, mat); break;
+      default: matlab::error("Attempted to convert from unknown class");
+    }
+    return mat;
+  }
+
+
+
+  // --------------------------------------------------------------------------
+  //                         INTEGRAL TYPES
+  // --------------------------------------------------------------------------
+
+  // --------------------------- string  --------------------------------------
+  Bridge& operator=(const std::string& ) { return *this; }
+  std::string toString() {
+    return ptr_.toString();
+  }
+  operator std::string() { return toString(); }
+
+  // ---------------------------  bool   --------------------------------------
+  Bridge& operator=(const bool& ) { return *this; }
+  bool toBool() { return 0; }
+  operator bool() { return toBool(); }
+
+  // --------------------------- double  --------------------------------------
+  Bridge& operator=(const double& ) { return *this; }
+  double toDouble() { return ptr_.scalar<double>(); }
+  operator double() { return toDouble(); }
+
+  // --------------------------- float  ---------------------------------------
+  Bridge& operator=(const float& ) { return *this; }
+  float toFloat() { return ptr_.scalar<float>(); }
+  operator float() { return toFloat(); }
+
+  // ---------------------------   int   --------------------------------------
+  Bridge& operator=(const int& ) { return *this; }
+  int toInt() { return ptr_.scalar<int>(); }
+  operator int() { return toInt(); }
+
+
+
+
+
+  // --------------------------------------------------------------------------
+  //                       CORE OPENCV TYPES
+  // --------------------------------------------------------------------------
+
+  // --------------------------   Point  --------------------------------------
+  Bridge& operator=(const cv::Point& ) { return *this; }
+  cv::Point toPoint() const { return cv::Point(); }
+  operator cv::Point() const { return toPoint(); }
+
+  // --------------------------   Point2f  ------------------------------------
+  Bridge& operator=(const cv::Point2f& ) { return *this; }
+  cv::Point2f toPoint2f() const { return cv::Point2f(); }
+  operator cv::Point2f() const { return toPoint2f(); }
+
+  // --------------------------   Point2d  ------------------------------------
+  Bridge& operator=(const cv::Point2d& ) { return *this; }
+  cv::Point2d toPoint2d() const { return cv::Point2d(); }
+  operator cv::Point2d() const { return toPoint2d(); }
+
+  // --------------------------   Size  ---------------------------------------
+  Bridge& operator=(const cv::Size& ) { return *this; }
+  cv::Size toSize() const { return cv::Size(); }
+  operator cv::Size() const { return toSize(); }
+
+  // -------------------------- Moments  --------------------------------------
+  Bridge& operator=(const cv::Moments& ) { return *this; }
+  cv::Moments toMoments() const { return cv::Moments(); }
+  operator cv::Moments() const { return toMoments(); }
+
+  // --------------------------  Scalar  --------------------------------------
+  Bridge& operator=(const cv::Scalar& ) { return *this; }
+  cv::Scalar toScalar() { return cv::Scalar(); }
+  operator cv::Scalar() { return toScalar(); }
+
+  // -------------------------- Rect  -----------------------------------------
+  Bridge& operator=(const cv::Rect& ) { return *this; }
+  cv::Rect toRect() { return cv::Rect(); }
+  operator cv::Rect() { return toRect(); }
+
+  // ---------------------- RotatedRect ---------------------------------------
+  Bridge& operator=(const cv::RotatedRect& ) { return *this; }
+  cv::RotatedRect toRotatedRect() { return cv::RotatedRect(); }
+  operator cv::RotatedRect() { return toRotatedRect(); }
+
+  // ---------------------- TermCriteria --------------------------------------
+  Bridge& operator=(const cv::TermCriteria& ) { return *this; }
+  cv::TermCriteria toTermCriteria() { return cv::TermCriteria(); }
+  operator cv::TermCriteria() { return toTermCriteria(); }
+
+  // ----------------------      RNG     --------------------------------------
+  Bridge& operator=(const cv::RNG& ) { return *this; }
+  /*! @brief explicit conversion to cv::RNG()
+   *
+   * Converts a bridge object to a cv::RNG(). We explicitly assert that
+   * the object is an RNG in matlab space before attempting to deference
+   * its pointer
+   */
+  cv::RNG toRNG() {
+    return (*getObjectByName<cv::RNG>("RNG"));
+  }
+  operator cv::RNG() { return toRNG(); }
+
+
+
+
+
+  // --------------------------------------------------------------------------
+  //                       OPENCV VECTOR TYPES
+  // --------------------------------------------------------------------------
+
+  // -------------------- vector_Mat ------------------------------------------
+  Bridge& operator=(const vector_Mat& ) { return *this; }
+  vector_Mat toVectorMat() { return vector_Mat(); }
+  operator vector_Mat() { return toVectorMat(); }
+
+  // --------------------------- vector_int  ----------------------------------
+  Bridge& operator=(const vector_int& ) { return *this; }
+  vector_int toVectorInt() { return vector_int(); }
+  operator vector_int() { return toVectorInt(); }
+
+  // --------------------------- vector_float  --------------------------------
+  Bridge& operator=(const vector_float& ) { return *this; }
+  vector_float toVectorFloat() { return vector_float(); }
+  operator vector_float() { return toVectorFloat(); }
+
+  // --------------------------- vector_Rect  ---------------------------------
+  Bridge& operator=(const vector_Rect& ) { return *this; }
+  vector_Rect toVectorRect() { return vector_Rect(); }
+  operator vector_Rect() { return toVectorRect(); }
+
+  // --------------------------- vector_KeyPoint  -----------------------------
+  Bridge& operator=(const vector_KeyPoint& ) { return *this; }
+  vector_KeyPoint toVectorKeyPoint() { return vector_KeyPoint(); }
+  operator vector_KeyPoint() { return toVectorKeyPoint(); }
+
+  // --------------------------- vector_String  -------------------------------
+  Bridge& operator=(const vector_String& ) { return *this; }
+  vector_String toVectorString() { return vector_String(); }
+  operator vector_String() { return toVectorString(); }
+
+  // ------------------------ vector_Point ------------------------------------
+  Bridge& operator=(const vector_Point& ) { return *this; }
+  vector_Point toVectorPoint() { return vector_Point(); }
+  operator vector_Point() { return toVectorPoint(); }
+
+  // ------------------------ vector_uchar ------------------------------------
+  Bridge& operator=(const vector_uchar& ) { return *this; }
+  vector_uchar toVectorUchar() { return vector_uchar(); }
+  operator vector_uchar() { return toVectorUchar(); }
+
+  // ------------------------ vector_vector_char ------------------------------
+  Bridge& operator=(const vector_vector_char& ) { return *this; }
+  vector_vector_char toVectorVectorChar() { return vector_vector_char(); }
+  operator vector_vector_char() { return toVectorVectorChar(); }
+
+  // ------------------------ vector_vector_DMatch ---------------------------
+  Bridge& operator=(const vector_vector_DMatch& ) { return *this; }
+  vector_vector_DMatch toVectorVectorDMatch() { return vector_vector_DMatch(); }
+  operator vector_vector_DMatch() { return toVectorVectorDMatch(); }
+
+
+
+
+  // --------------------------------------------------------------------------
+  //                       OPENCV COMPOUND TYPES
+  // --------------------------------------------------------------------------
+
+  // ---------------------------   Ptr_StereoBM   -----------------------------
+  Bridge& operator=(const Ptr_StereoBM& ) { return *this; }
+  Ptr_StereoBM toPtrStereoBM() { return Ptr_StereoBM(); }
+  operator Ptr_StereoBM() { return toPtrStereoBM(); }
+
+  // ---------------------------   Ptr_StereoSGBM   ---------------------------
+  Bridge& operator=(const Ptr_StereoSGBM& ) { return *this; }
+  Ptr_StereoSGBM toPtrStereoSGBM() { return Ptr_StereoSGBM(); }
+  operator Ptr_StereoSGBM() { return toPtrStereoSGBM(); }
+
+  // ---------------------------   Ptr_FeatureDetector   ----------------------
+  Bridge& operator=(const Ptr_FeatureDetector& ) { return *this; }
+  Ptr_FeatureDetector toPtrFeatureDetector() { return Ptr_FeatureDetector(); }
+  operator Ptr_FeatureDetector() { return toPtrFeatureDetector(); }
+
+  // ---------------------------   Ptr_CLAHE   --------------------------------
+  Bridge& operator=(const Ptr_CLAHE& ) { return *this; }
+  Ptr_CLAHE toPtrCLAHE() { return Ptr_CLAHE(); }
+  operator Ptr_CLAHE() { return toPtrCLAHE(); }
+}; // class Bridge
+
+
+
+// --------------------------------------------------------------------------
+//                           SPECIALIZATIONS
+// --------------------------------------------------------------------------
+
+/*!
+ * @brief template specialization for inheriting types
+ *
+ * This template specialization attempts to preserve the best mapping
+ * between OpenCV and Matlab types. Matlab uses double types almost universally, so
+ * all floating float types are converted to doubles.
+ * Unfortunately OpenCV does not have a native logical type, so
+ * that gets mapped to an unsigned 8-bit value
+ */
+template <>
+matlab::MxArray Bridge::FromMat<matlab::InheritType>(const cv::Mat& mat) {
+  switch (mat.depth()) {
+    case CV_8U:  return FromMat<uint8_t>(mat);
+    case CV_8S:  return FromMat<int8_t>(mat);
+    case CV_16U: return FromMat<uint16_t>(mat);
+    case CV_16S: return FromMat<int16_t>(mat);
+    case CV_32S: return FromMat<int32_t>(mat);
+    case CV_32F: return FromMat<double>(mat); //NOTE: Matlab uses double as native type!
+    case CV_64F: return FromMat<double>(mat);
+    default: matlab::error("Attempted to convert from unknown class");
+  }
+  return matlab::MxArray();
+}
+
+/*!
+ * @brief template specialization for inheriting types
+ *
+ * This template specialization attempts to preserve the best mapping
+ * between Matlab and OpenCV types. OpenCV has poor support for double precision
+ * types, so all floating point types are cast to float. Logicals get cast
+ * to unsignd 8-bit value.
+ */
+template <>
+cv::Mat Bridge::toMat<matlab::InheritType>() const {
+  switch (ptr_.ID()) {
+    case mxINT8_CLASS:    return toMat<int8_t>();
+    case mxUINT8_CLASS:   return toMat<uint8_t>();
+    case mxINT16_CLASS:   return toMat<int16_t>();
+    case mxUINT16_CLASS:  return toMat<uint16_t>();
+    case mxINT32_CLASS:   return toMat<int32_t>();
+    case mxUINT32_CLASS:  return toMat<int32_t>();
+    case mxINT64_CLASS:   return toMat<int64_t>();
+    case mxUINT64_CLASS:  return toMat<int64_t>();
+    case mxSINGLE_CLASS:  return toMat<float>();
+    case mxDOUBLE_CLASS:  return toMat<float>(); //NOTE: OpenCV uses float as native type!
+    case mxCHAR_CLASS:    return toMat<int8_t>();
+    case mxLOGICAL_CLASS: return toMat<int8_t>();
+    default: matlab::error("Attempted to convert from unknown class");
+  }
+  return cv::Mat();
+}
+
+Bridge& Bridge::operator=(const cv::Mat& mat) { ptr_ = FromMat<matlab::InheritType>(mat); return *this; }
+cv::Mat Bridge::toMat() const { return toMat<matlab::InheritType>(); }
+
+
+// ----------------------------------------------------------------------------
+//                            MATRIX TRANSPOSE
+// ----------------------------------------------------------------------------
+
+
+template <typename InputScalar, typename OutputScalar>
+void deepCopyAndTranspose(const cv::Mat& in, matlab::MxArray& out) {
+  matlab::conditionalError(static_cast<size_t>(in.rows) == out.rows(), "Matrices must have the same number of rows");
+  matlab::conditionalError(static_cast<size_t>(in.cols) == out.cols(), "Matrices must have the same number of cols");
+  matlab::conditionalError(static_cast<size_t>(in.channels()) == out.channels(), "Matrices must have the same number of channels");
+  std::vector<cv::Mat> channels;
+  cv::split(in, channels);
+  for (size_t c = 0; c < out.channels(); ++c) {
+    cv::transpose(channels[c], channels[c]);
+    cv::Mat outmat(out.cols(), out.rows(), cv::DataType<OutputScalar>::type,
+      static_cast<void *>(out.real<OutputScalar>() + out.cols()*out.rows()*c));
+    channels[c].convertTo(outmat, cv::DataType<OutputScalar>::type);
+  }
+
+  //const InputScalar* inp = in.ptr<InputScalar>(0);
+  //OutputScalar* outp = out.real<OutputScalar>();
+  //gemt('R', out.rows(), out.cols(), inp, in.step1(), outp, out.rows());
+}
+
+template <typename InputScalar, typename OutputScalar>
+void deepCopyAndTranspose(const matlab::MxArray& in, cv::Mat& out) {
+  matlab::conditionalError(in.rows() == static_cast<size_t>(out.rows), "Matrices must have the same number of rows");
+  matlab::conditionalError(in.cols() == static_cast<size_t>(out.cols), "Matrices must have the same number of cols");
+  matlab::conditionalError(in.channels() == static_cast<size_t>(out.channels()), "Matrices must have the same number of channels");
+  std::vector<cv::Mat> channels;
+  for (size_t c = 0; c < in.channels(); ++c) {
+    cv::Mat outmat;
+    cv::Mat inmat(in.cols(), in.rows(), cv::DataType<InputScalar>::type,
+      static_cast<void *>(const_cast<InputScalar *>(in.real<InputScalar>() + in.cols()*in.rows()*c)));
+    inmat.convertTo(outmat, cv::DataType<OutputScalar>::type);
+    cv::transpose(outmat, outmat);
+    channels.push_back(outmat);
+  }
+  cv::merge(channels, out);
+
+  //const InputScalar* inp = in.real<InputScalar>();
+  //OutputScalar* outp = out.ptr<OutputScalar>(0);
+  //gemt('C', in.rows(), in.cols(), inp, in.rows(), outp, out.step1());
+}
+
+
+
+} // namespace bridge
+} // namespace cv
+
+#endif
--- a/modules/matlab/include/opencv2/matlab/map.hpp
+++ b/modules/matlab/include/opencv2/matlab/map.hpp
@@ -0,0 +1,91 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this
+//  license. If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote
+//     products derived from this software without specific prior written
+//     permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or business
+// interruption) however caused and on any theory of liability, whether in
+// contract, strict liability, or tort (including negligence or otherwise)
+// arising in any way out of the use of this software, even if advised of the
+// possibility of such damage.
+//
+////////////////////////////////////////////////////////////////////////////////
+#ifndef OPENCV_MAP_HPP_
+#define OPENCV_MAP_HPP_
+
+namespace matlab {
+#if __cplusplus >= 201103L
+
+ // If we have C++11 support, we just want to use unordered_map
+#include <unordered_map>
+template <typename KeyType, typename ValueType>
+using Map = std::unordered_map<KeyType, ValueType>;
+
+#else
+
+// If we don't have C++11 support, we wrap another map implementation
+// in the same public API as unordered_map
+#include <map>
+#include <stdexcept>
+
+template <typename KeyType, typename ValueType>
+class Map {
+private:
+  std::map<KeyType, ValueType> map_;
+public:
+  // map[key] = val;
+  ValueType& operator[] (const KeyType& k) {
+    return map_[k];
+  }
+
+  // map.at(key) = val (throws)
+  ValueType& at(const KeyType& k) {
+    typename std::map<KeyType, ValueType>::iterator it;
+    it = map_.find(k);
+    if (it == map_.end()) throw std::out_of_range("Key not found");
+    return *it;
+  }
+
+  // val = map.at(key)  (throws, const)
+  const ValueType& at(const KeyType& k) const {
+    typename std::map<KeyType, ValueType>::const_iterator it;
+    it = map_.find(k);
+    if (it == map_.end()) throw std::out_of_range("Key not found");
+    return *it;
+  }
+};
+
+} // namespace matlab
+
+#endif
+#endif
--- a/modules/matlab/include/opencv2/matlab/mxarray.hpp
+++ b/modules/matlab/include/opencv2/matlab/mxarray.hpp
@@ -0,0 +1,684 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this
+//  license. If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote
+//     products derived from this software without specific prior written
+//     permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or business
+// interruption) however caused and on any theory of liability, whether in
+// contract, strict liability, or tort (including negligence or otherwise)
+// arising in any way out of the use of this software, even if advised of the
+// possibility of such damage.
+//
+////////////////////////////////////////////////////////////////////////////////
+#ifndef OPENCV_MXARRAY_HPP_
+#define OPENCV_MXARRAY_HPP_
+
+#include <mex.h>
+#include <stdint.h>
+#include <cstdarg>
+#include <algorithm>
+#include <string>
+#include <vector>
+#include <sstream>
+#if __cplusplus > 201103
+#include <unordered_set>
+typedef std::unordered_set<std::string> StringSet;
+#else
+#include <set>
+typedef std::set<std::string> StringSet;
+#endif
+
+/*
+ * All recent versions of Matlab ship with the MKL library which contains
+ * a blas extension called mkl_?omatcopy(). This  defines an out-of-place
+ * copy and transpose operation.
+ *
+ * The mkl library is in ${MATLAB_ROOT}/bin/${MATLAB_MEXEXT}/libmkl...
+ * Matlab does not ship headers for the mkl functions, so we define them
+ * here.
+ *
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+
+namespace matlab {
+// ----------------------------------------------------------------------------
+//                          PREDECLARATIONS
+// ----------------------------------------------------------------------------
+class MxArray;
+typedef std::vector<MxArray> MxArrayVector;
+
+/*!
+ * @brief raise error if condition fails
+ *
+ * This is a conditional wrapper for mexErrMsgTxt. If the conditional
+ * expression fails, an error is raised and the mex function returns
+ * to Matlab, otherwise this function does nothing
+ */
+static void conditionalError(bool expr, const std::string& str) {
+  if (!expr) mexErrMsgTxt(std::string("condition failed: ").append(str).c_str());
+}
+
+/*!
+ * @brief raise an error
+ *
+ * This function is a wrapper around mexErrMsgTxt
+ */
+static void error(const std::string& str) {
+  mexErrMsgTxt(str.c_str());
+}
+
+
+// ----------------------------------------------------------------------------
+//                            MATLAB TRAITS
+// ----------------------------------------------------------------------------
+class DefaultTraits {};
+class InheritType {};
+
+template<typename _Tp = DefaultTraits> class Traits {
+public:
+  static const mxClassID ScalarType = mxUNKNOWN_CLASS;
+  static const mxComplexity Complex = mxCOMPLEX;
+  static const mxComplexity Real    = mxREAL;
+  static std::string ToString()  { return "Unknown/Unsupported"; }
+};
+// bool
+template<> class Traits<bool> {
+public:
+  static const mxClassID ScalarType = mxLOGICAL_CLASS;
+  static std::string ToString()  { return "boolean"; }
+};
+// uint8_t
+template<> class Traits<uint8_t> {
+public:
+  static const mxClassID ScalarType = mxUINT8_CLASS;
+  static std::string ToString()  { return "uint8_t"; }
+};
+// int8_t
+template<> class Traits<int8_t> {
+public:
+  static const mxClassID ScalarType = mxINT8_CLASS;
+  static std::string ToString()  { return "int8_t"; }
+};
+// uint16_t
+template<> class Traits<uint16_t> {
+public:
+  static const mxClassID ScalarType = mxUINT16_CLASS;
+  static std::string ToString()  { return "uint16_t"; }
+};
+// int16_t
+template<> class Traits<int16_t> {
+public:
+  static const mxClassID ScalarType = mxINT16_CLASS;
+  static std::string ToString()  { return "int16_t"; }
+};
+// uint32_t
+template<> class Traits<uint32_t> {
+public:
+  static const mxClassID ScalarType = mxUINT32_CLASS;
+  static std::string ToString()  { return "uint32_t"; }
+};
+// int32_t
+template<> class Traits<int32_t> {
+public:
+  static const mxClassID ScalarType = mxINT32_CLASS;
+  static std::string ToString()  { return "int32_t"; }
+};
+// uint64_t
+template<> class Traits<uint64_t> {
+public:
+  static const mxClassID ScalarType = mxUINT64_CLASS;
+  static std::string ToString()  { return "uint64_t"; }
+};
+// int64_t
+template<> class Traits<int64_t> {
+public:
+  static const mxClassID ScalarType = mxINT64_CLASS;
+  static std::string ToString()  { return "int64_t"; }
+};
+// float
+template<> class Traits<float> {
+public:
+  static const mxClassID ScalarType = mxSINGLE_CLASS;
+  static std::string ToString()  { return "float"; }
+};
+// double
+template<> class Traits<double> {
+public:
+  static const mxClassID ScalarType = mxDOUBLE_CLASS;
+  static std::string ToString()  { return "double"; }
+};
+// char
+template<> class Traits<char> {
+public:
+  static const mxClassID ScalarType = mxCHAR_CLASS;
+  static std::string ToString()  { return "char"; }
+};
+// inherited type
+template<> class Traits<matlab::InheritType> {
+public:
+  static std::string ToString()  { return "Inherited type"; }
+};
+
+
+
+// ----------------------------------------------------------------------------
+//                                MXARRAY
+// ----------------------------------------------------------------------------
+
+
+/*!
+ * @class MxArray
+ * @brief A thin wrapper around Matlab's mxArray types
+ *
+ * MxArray provides a thin object oriented wrapper around Matlab's
+ * native mxArray type which exposes most of the functionality of the
+ * Matlab interface, but in a more C++ manner. MxArray objects are scoped,
+ * so you can freely create and destroy them without worrying about memory
+ * management. If you wish to pass the underlying mxArray* representation
+ * back to Matlab as an lvalue, see the releaseOwnership() method
+ *
+ * MxArrays can be directly converted into OpenCV mat objects and std::string
+ * objects, since there is a natural mapping between these types. More
+ * complex types are mapped through the Bridge which does custom conversions
+ * such as MxArray --> cv::Keypoints, etc
+ */
+class MxArray {
+private:
+  mxArray* ptr_;
+  bool owns_;
+
+  /*!
+   * @brief swap all members of this and other
+   *
+   * the swap method is used by the assignment and move constructors
+   * to swap the members of two MxArrays, leaving both in destructible states
+   */
+  friend void swap(MxArray& first, MxArray& second) {
+    using std::swap;
+    swap(first.ptr_,  second.ptr_);
+    swap(first.owns_, second.owns_);
+  }
+
+  void dealloc() {
+    if (owns_ && ptr_) { mxDestroyArray(ptr_); ptr_ = NULL; owns_ = false; }
+  }
+public:
+  // --------------------------------------------------------------------------
+  //                              CONSTRUCTORS
+  // --------------------------------------------------------------------------
+  /*!
+   * @brief default constructor
+   *
+   * Construct a valid 0x0 matrix (so all other methods do not need validity checks)
+   */
+  MxArray() : ptr_(mxCreateDoubleMatrix(0, 0, matlab::Traits<>::Real)), owns_(true) {}
+
+  /*!
+   * @brief destructor
+   *
+   * The destructor deallocates any data allocated by mxCreate* methods only
+   * if the object is owned
+   */
+  virtual ~MxArray() {
+    dealloc();
+  }
+
+  /*!
+   * @brief inheriting constructor
+   *
+   * Inherit an mxArray from Matlab. Don't claim ownership of the array,
+   * just encapsulate it
+   */
+  MxArray(const mxArray* ptr) : ptr_(const_cast<mxArray *>(ptr)), owns_(false) {}
+  MxArray& operator=(const mxArray* ptr) {
+    dealloc();
+    ptr_ = const_cast<mxArray *>(ptr);
+    owns_ = false;
+    return *this;
+  }
+
+  /*!
+   * @brief explicit typed constructor
+   *
+   * This constructor explicitly creates an MxArray of the given size and type.
+   */
+  MxArray(size_t m, size_t n, size_t k, mxClassID id, mxComplexity com = matlab::Traits<>::Real)
+      : ptr_(NULL), owns_(true) {
+    mwSize dims[] = { static_cast<mwSize>(m), static_cast<mwSize>(n), static_cast<mwSize>(k) };
+    ptr_ = mxCreateNumericArray(3, dims, id, com);
+  }
+
+  /*!
+   * @brief explicit tensor constructor
+   *
+   * Explicitly construct a tensor of given size and type. Since constructors cannot
+   * be explicitly templated, this is a static factory method
+   */
+  template <typename Scalar>
+  static MxArray Tensor(size_t m, size_t n, size_t k=1) {
+    return MxArray(m, n, k, matlab::Traits<Scalar>::ScalarType);
+  }
+
+  /*!
+   * @brief explicit matrix constructor
+   *
+   * Explicitly construct a matrix of given size and type. Since constructors cannot
+   * be explicitly templated, this is a static factory method
+   */
+  template <typename Scalar>
+  static MxArray Matrix(size_t m, size_t n) {
+    return MxArray(m, n, 1, matlab::Traits<Scalar>::ScalarType);
+  }
+
+  /*!
+   * @brief explicit vector constructor
+   *
+   * Explicitly construct a vector of given size and type. Since constructors cannot
+   * be explicitly templated, this is a static factory method
+   */
+  template <typename Scalar>
+  static MxArray Vector(size_t m) {
+    return MxArray(m, 1, 1, matlab::Traits<Scalar>::ScalarType);
+  }
+
+  /*!
+   * @brief explicit scalar constructor
+   *
+   * Explicitly construct a scalar of given type. Since constructors cannot
+   * be explicitly templated, this is a static factory method
+   */
+  template <typename ScalarType>
+  static MxArray Scalar(ScalarType value = 0) {
+    MxArray s(1, 1, 1, matlab::Traits<ScalarType>::ScalarType);
+    s.real<ScalarType>()[0] = value;
+    return s;
+  }
+
+  /*!
+   * @brief copy constructor
+   *
+   * All copies are deep copies. If you have a C++11 compatible compiler, prefer
+   * move construction to copy construction
+   */
+  MxArray(const MxArray& other) : ptr_(mxDuplicateArray(other.ptr_)), owns_(true) {}
+
+  /*!
+   * @brief copy-and-swap assignment
+   *
+   * This assignment operator uses the copy and swap idiom to provide a strong
+   * exception guarantee when swapping two objects.
+   *
+   * Note in particular that the other MxArray is passed by value, thus invoking
+   * the copy constructor which performs a deep copy of the input. The members of
+   * this and other are then swapped
+   */
+  MxArray& operator=(MxArray other) {
+    swap(*this, other);
+    return *this;
+  }
+#if __cplusplus >= 201103L
+  /*
+   * @brief C++11 move constructor
+   *
+   * When C++11 support is available, move construction is used to move returns
+   * out of functions, etc. This is much fast than copy construction, since the
+   * move constructed object replaced itself with a default constructed MxArray,
+   * which is of size 0 x 0.
+   */
+  MxArray(MxArray&& other) : MxArray() {
+    swap(*this, other);
+  }
+#endif
+
+  /*
+   * @brief release ownership to allow return into Matlab workspace
+   *
+   * MxArray is not directly convertible back to mxArray types through assignment
+   * because the MxArray may have been allocated on the free store, making it impossible
+   * to know whether the returned pointer will be released by someone else or not.
+   *
+   * Since Matlab requires mxArrays be passed back into the workspace, the only way
+   * to achieve that is through this function, which explicitly releases ownership
+   * of the object, assuming the Matlab interpreter receving the object will delete
+   * it at a later time
+   *
+   * e.g.
+   * {
+   *    MxArray A = MxArray::Matrix<double>(5, 5);  // allocates memory
+   *    MxArray B = MxArray::Matrix<double>(5, 5);  // ditto
+   *    plhs[0] = A;                                // not allowed!!
+   *    plhs[0] = A.releaseOwnership();             // makes explicit that ownership is being released
+   * } // end of scope. B is released, A isn't
+   *
+   */
+  mxArray* releaseOwnership() {
+    owns_ = false;
+    return ptr_;
+  }
+
+  MxArray field(const std::string& name) { return MxArray(mxGetField(ptr_, 0, name.c_str())); }
+
+  template <typename Scalar>
+  Scalar* real() { return static_cast<Scalar *>(mxGetData(ptr_)); }
+
+  template <typename Scalar>
+  Scalar* imag() { return static_cast<Scalar *>(mxGetImagData(ptr_)); }
+
+  template <typename Scalar>
+  const Scalar* real() const { return static_cast<const Scalar *>(mxGetData(ptr_)); }
+
+  template <typename Scalar>
+  const Scalar* imag() const { return static_cast<const Scalar *>(mxGetData(ptr_)); }
+
+  template <typename Scalar>
+  Scalar scalar() const { return static_cast<Scalar *>(mxGetData(ptr_))[0]; }
+
+  std::string toString() const {
+    conditionalError(isString(), "Attempted to convert non-string type to string");
+    std::string str(size(), '\0');
+    mxGetString(ptr_, const_cast<char *>(str.data()), str.size()+1);
+    return str;
+  }
+
+  size_t size() const { return mxGetNumberOfElements(ptr_); }
+  bool empty() const { return size() == 0; }
+  size_t rows() const { return mxGetDimensions(ptr_)[0]; }
+  size_t cols() const { return mxGetDimensions(ptr_)[1]; }
+  size_t channels() const { return (mxGetNumberOfDimensions(ptr_) > 2) ? mxGetDimensions(ptr_)[2] : 1; }
+  bool isComplex() const { return mxIsComplex(ptr_); }
+  bool isNumeric() const { return mxIsNumeric(ptr_); }
+  bool isLogical() const { return mxIsLogical(ptr_); }
+  bool isString() const { return mxIsChar(ptr_); }
+  bool isCell() const { return mxIsCell(ptr_); }
+  bool isStructure() const { return mxIsStruct(ptr_); }
+  bool isClass(const std::string& name) const { return mxIsClass(ptr_, name.c_str()); }
+  std::string className() const { return std::string(mxGetClassName(ptr_)); }
+  mxClassID ID() const { return mxGetClassID(ptr_); }
+
+};
+
+
+// ----------------------------------------------------------------------------
+//                           ARGUMENT PARSER
+// ----------------------------------------------------------------------------
+
+/*! @class ArgumentParser
+ *  @brief parses inputs to a method and resolves the argument names.
+ *
+ * The ArgumentParser resolves the inputs to a method. It checks that all
+ * required arguments are specified and also allows named optional arguments.
+ * For example, the C++ function:
+ *    void randn(Mat& mat, Mat& mean=Mat(), Mat& std=Mat());
+ * could be called in Matlab using any of the following signatures:
+ * \code
+ *    out = randn(in);
+ *    out = randn(in, 0, 1);
+ *    out = randn(in, 'mean', 0, 'std', 1);
+ * \endcode
+ *
+ * ArgumentParser also enables function overloading by allowing users
+ * to add variants to a method. For example, there may be two C++ sum() methods:
+ * \code
+ *    double sum(Mat& mat);     % sum elements of a matrix
+ *    Mat sum(Mat& A, Mat& B);  % add two matrices
+ * \endcode
+ *
+ * by adding two variants to ArgumentParser, the correct underlying sum
+ * method can be called. If the function call is ambiguous, the
+ * ArgumentParser will fail with an error message.
+ *
+ * The previous example could be parsed as:
+ * \code
+ *    // set up the Argument parser
+ *    ArgumentParser arguments;
+ *    arguments.addVariant("elementwise", 1);
+ *    arguments.addVariant("matrix", 2);
+ *
+ *    // parse the arguments
+ *    std::vector<MxArray> inputs;
+ *    inputs = arguments.parse(std::vector<MxArray>(prhs, prhs+nrhs));
+ *
+ *    // if we get here, one unique variant is valid
+ *    if (arguments.variantIs("elementwise")) {
+ *      // call elementwise sum()
+ *    }
+ * \endcode
+ */
+class ArgumentParser {
+private:
+  struct Variant;
+  typedef std::string String;
+  typedef std::vector<std::string> StringVector;
+  typedef std::vector<size_t> IndexVector;
+  typedef std::vector<Variant> VariantVector;
+
+  /* @class Variant
+   * @brief Describes a variant of arguments to a method
+   *
+   * When addVariant() is called on an instance to ArgumentParser, this class
+   * holds the the information that decribes that variant. The parse() method
+   * of ArgumentParser then attempts to match a Variant, given a set of
+   * inputs for a method invocation.
+   */
+  class Variant {
+  private:
+    String name_;
+    size_t Nreq_;
+    size_t Nopt_;
+    StringVector keys_;
+    IndexVector order_;
+    bool valid_;
+    size_t nparsed_;
+    size_t nkeys_;
+    size_t working_opt_;
+    bool expecting_val_;
+    bool using_named_;
+    size_t find(const String& key) const {
+      return std::find(keys_.begin(), keys_.end(), key) - keys_.begin();
+    }
+  public:
+    /*! @brief default constructor */
+    Variant() : Nreq_(0), Nopt_(0), valid_(false) {}
+    /*! @brief construct a new variant spec */
+    Variant(const String& name, size_t Nreq, size_t Nopt, const StringVector& keys)
+      : name_(name), Nreq_(Nreq), Nopt_(Nopt), keys_(keys),
+      order_(Nreq+Nopt, Nreq+2*Nopt), valid_(true), nparsed_(0), nkeys_(0),
+      working_opt_(0), expecting_val_(false), using_named_(false) {}
+    /*! @brief the name of the variant */
+    String name() const { return name_; }
+    /*! @brief return the total number of arguments the variant can take */
+    size_t size() const { return Nreq_ + Nopt_; }
+    /*! @brief has the variant been fulfilled? */
+    bool fulfilled() const { return (valid_ && nparsed_ >= Nreq_ && !expecting_val_); }
+    /*! @brief is the variant in a valid state (though not necessarily fulfilled) */
+    bool valid() const { return valid_; }
+    /*! @brief check if the named argument exists in the variant */
+    bool exist(const String& key) const { return find(key) != keys_.size(); }
+    /*! @brief retrieve the order mapping raw inputs to their position in the variant */
+    const IndexVector& order() const { return order_; }
+    size_t order(size_t n) const { return order_[n]; }
+    /*! @brief attempt to parse the next argument as a value */
+    bool parseNextAsValue() {
+      if (!valid_) {}
+      else if ((using_named_ && !expecting_val_) || (nparsed_-nkeys_ == Nreq_+Nopt_)) { valid_ = false; }
+      else if (nparsed_ < Nreq_) { order_[nparsed_] = nparsed_; }
+      else if (!using_named_) { order_[nparsed_] = nparsed_; }
+      else if (using_named_ && expecting_val_) { order_[Nreq_ + working_opt_] = nparsed_; }
+      nparsed_++;
+      expecting_val_ = false;
+      return valid_;
+    }
+    /*! @biref attempt to parse the next argument as a name (key) */
+    bool parseNextAsKey(const String& key) {
+      if (!valid_) {}
+      else if ((nparsed_ < Nreq_) || (nparsed_-nkeys_ == Nreq_+Nopt_)) { valid_ = false; }
+      else if (using_named_ && expecting_val_) { valid_ = false; }
+      else if ((working_opt_ = find(key)) == keys_.size()) { valid_ = false; }
+      else { using_named_ = true; expecting_val_ = true; nkeys_++; nparsed_++; }
+      return valid_;
+    }
+    String toString(const String& method_name="f") const {
+      int req_begin = 0, req_end = 0, opt_begin = 0, opt_end = 0;
+      std::ostringstream s;
+      // f(...)
+      s << method_name << "(";
+      // required arguments
+      req_begin = s.str().size();
+      for (size_t n = 0; n < Nreq_; ++n) { s << "src" << n+1 << (n != Nreq_-1 ? ", " : ""); }
+      req_end = s.str().size();
+      if (Nreq_ && Nopt_) s << ", ";
+      // optional arguments
+      opt_begin = s.str().size();
+      for (size_t n = 0; n < keys_.size(); ++n) { s << "'" << keys_[n] << "', " << keys_[n] << (n != Nopt_-1 ? ", " : ""); }
+      opt_end = s.str().size();
+      s << ");";
+      if (Nreq_ + Nopt_ == 0) return s.str();
+      // underscores
+      String under = String(req_begin, ' ') + String(req_end-req_begin, '-')
+                   + String(std::max(opt_begin-req_end,0), ' ') + String(opt_end-opt_begin, '-');
+      s << "\n" << under;
+      // required and optional sets
+      String req_set(req_end-req_begin, ' ');
+      String opt_set(opt_end-opt_begin, ' ');
+      if (!req_set.empty() && req_set.size() < 8) req_set.replace((req_set.size()-3)/2, 3, "req");
+      if (req_set.size() > 7) req_set.replace((req_set.size()-8)/2, 8, "required");
+      if (!opt_set.empty() && opt_set.size() < 8) opt_set.replace((opt_set.size()-3)/2, 3, "opt");
+      if (opt_set.size() > 7) opt_set.replace((opt_set.size()-8)/2, 8, "optional");
+      String set = String(req_begin, ' ') + req_set + String(std::max(opt_begin-req_end,0), ' ') + opt_set;
+      s << "\n" << set;
+      return s.str();
+    }
+  };
+  /*! @brief given an input and output vector of arguments, and a variant spec, sort */
+  void sortArguments(Variant& v, MxArrayVector& in, MxArrayVector& out) {
+    // allocate the output array with ALL arguments
+    out.resize(v.size());
+    // reorder the inputs based on the variant ordering
+    for (size_t n = 0; n < v.size(); ++n) {
+      if (v.order(n) >= in.size()) continue;
+      swap(in[v.order(n)], out[n]);
+    }
+  }
+  VariantVector variants_;
+  String valid_;
+  String method_name_;
+public:
+  ArgumentParser(const String& method_name) : method_name_(method_name) {}
+
+  /*! @brief add a function call variant to the parser
+   *
+   * Adds a function-call signature to the parser. The function call *must* be
+   * unique either in its number of arguments, or in the named-syntax.
+   * Currently this function does not check whether that invariant stands true.
+   *
+   * This function is variadic. If should be called as follows:
+   *  addVariant(2, 2, 'opt_1_name', 'opt_2_name');
+   */
+  void addVariant(const String& name, size_t nreq, size_t nopt = 0, ...) {
+    StringVector keys;
+    va_list opt;
+    va_start(opt, nopt);
+    for (size_t n = 0; n < nopt; ++n) keys.push_back(va_arg(opt, const char*));
+    addVariant(name, nreq, nopt, keys);
+  }
+  void addVariant(const String& name, size_t nreq, size_t nopt, StringVector keys) {
+    variants_.push_back(Variant(name, nreq, nopt, keys));
+  }
+
+  /*! @brief check if the valid variant is the key name */
+  bool variantIs(const String& name) {
+    return name.compare(valid_) == 0;
+  }
+
+  /*! @brief parse a vector of input arguments
+   *
+   * This method parses a vector of input arguments, attempting to match them
+   * to a Variant spec. For each input, the method attempts to cull any
+   * Variants which don't match the given inputs so far.
+   *
+   * Once all inputs have been parsed, if there is one unique spec remaining,
+   * the output MxArray vector gets populated with the arguments, with named
+   * arguments removed. Any optional arguments that have not been encountered
+   * are set to an empty array.
+   *
+   * If multiple variants or no variants match the given call, an error
+   * message is emitted
+   */
+  MxArrayVector parse(const MxArrayVector& inputs) {
+    // allocate the outputs
+    String variant_string;
+    MxArrayVector outputs;
+    VariantVector candidates = variants_;
+
+    // iterate over the inputs, attempting to match a variant
+    for (MxArrayVector::const_iterator input = inputs.begin(); input != inputs.end(); ++input) {
+      String name = input->isString() ? input->toString() : String();
+      for (VariantVector::iterator candidate = candidates.begin(); candidate < candidates.end(); ++candidate) {
+        candidate->exist(name) ? candidate->parseNextAsKey(name) : candidate->parseNextAsValue();
+      }
+    }
+
+    // make sure the candidates have been fulfilled
+    for (VariantVector::iterator candidate = candidates.begin(); candidate < candidates.end(); ++candidate) {
+      if (!candidate->fulfilled()) candidate = candidates.erase(candidate)--;
+    }
+
+    // if there is not a unique candidate, throw an error
+    for (VariantVector::iterator variant = variants_.begin(); variant != variants_.end(); ++variant) {
+      variant_string += "\n" + variant->toString(method_name_);
+    }
+
+    // if there is not a unique candidate, throw an error
+    if (candidates.size()  > 1) {
+      error(String("Call to method is ambiguous. Valid variants are:")
+        .append(variant_string).append("\nUse named arguments to disambiguate call"));
+    }
+    if (candidates.size() == 0) {
+      error(String("No matching method signatures for given arguments. Valid variants are:").append(variant_string));
+    }
+
+    // Unique candidate!
+    valid_ = candidates[0].name();
+    sortArguments(candidates[0], const_cast<MxArrayVector&>(inputs), outputs);
+    return outputs;
+  }
+};
+
+} // namespace matlab
+
+#endif
--- a/modules/matlab/include/opencv2/matlab/transpose.hpp
+++ b/modules/matlab/include/opencv2/matlab/transpose.hpp
@@ -0,0 +1,141 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this
+//  license. If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote
+//     products derived from this software without specific prior written
+//     permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or business
+// interruption) however caused and on any theory of liability, whether in
+// contract, strict liability, or tort (including negligence or otherwise)
+// arising in any way out of the use of this software, even if advised of the
+// possibility of such damage.
+//
+////////////////////////////////////////////////////////////////////////////////
+#ifndef OPENCV_TRANSPOSE_HPP_
+#define OPENCV_TRANSPOSE_HPP_
+
+template <typename InputScalar, typename OutputScalar>
+void transposeBlock(const size_t M, const size_t N, const InputScalar* src, size_t lda, OutputScalar* dst, size_t ldb) {
+  InputScalar cache[16];
+  // copy the source into the cache contiguously
+  for (size_t n = 0; n < N; ++n)
+    for (size_t m = 0; m < M; ++m)
+      cache[m+n*4] = src[m+n*lda];
+  // copy the destination out of the cache contiguously
+  for (size_t m = 0; m < M; ++m)
+    for (size_t n = 0; n < N; ++n)
+      dst[n+m*ldb] = cache[m+n*4];
+}
+
+template <typename InputScalar, typename OutputScalar>
+void transpose4x4(const InputScalar* src, size_t lda, OutputScalar* dst, size_t ldb) {
+  InputScalar cache[16];
+  // copy the source into the cache contiguously
+  cache[0] = src[0];  cache[1] = src[1];  cache[2] = src[2];  cache[3] = src[3];  src+=lda;
+  cache[4] = src[0];  cache[5] = src[1];  cache[6] = src[2];  cache[7] = src[3];  src+=lda;
+  cache[8] = src[0];  cache[9] = src[1];  cache[10] = src[2]; cache[11] = src[3]; src+=lda;
+  cache[12] = src[0]; cache[13] = src[1]; cache[14] = src[2]; cache[15] = src[3]; src+=lda;
+  // copy the destination out of the contiguously
+  dst[0] = cache[0];  dst[1] = cache[4];  dst[2] = cache[8];   dst[3] = cache[12]; dst+=ldb;
+  dst[0] = cache[1];  dst[1] = cache[5];  dst[2] = cache[9];   dst[3] = cache[13]; dst+=ldb;
+  dst[0] = cache[2];  dst[1] = cache[6];  dst[2] = cache[10];  dst[3] = cache[14]; dst+=ldb;
+  dst[0] = cache[3];  dst[1] = cache[7];  dst[2] = cache[11];  dst[3] = cache[15]; dst+=ldb;
+}
+
+
+/*
+ * Vanilla copy, transpose and cast
+ */
+template <typename InputScalar, typename OutputScalar>
+void gemt(const char major, const size_t M, const size_t N, const InputScalar* a, size_t lda, OutputScalar* b, size_t ldb) {
+
+  // 1x1 transpose is just copy
+  if (M == 1 && N == 1) { *b = *a; return; }
+
+  // get the interior 4x4 blocks, and the extra skirting
+  const size_t Fblock = (major == 'R') ? N/4 : M/4;
+  const size_t Frem   = (major == 'R') ? N%4 : M%4;
+  const size_t Sblock = (major == 'R') ? M/4 : N/4;
+  const size_t Srem   = (major == 'R') ? M%4 : N%4;
+
+  // if less than 4x4, invoke the block transpose immediately
+  if (M < 4 && N < 4) { transposeBlock(Frem, Srem, a, lda, b, ldb); return; }
+
+  // transpose 4x4 blocks
+  const InputScalar* aptr = a;
+  OutputScalar* bptr = b;
+  for (size_t second = 0; second < Sblock; ++second) {
+    aptr = a + second*lda;
+    bptr = b + second;
+    for (size_t first = 0; first < Fblock; ++first) {
+      transposeBlock(4, 4, aptr, lda, bptr, ldb);
+      //transpose4x4(aptr, lda, bptr, ldb);
+      aptr+=4;
+      bptr+=4*ldb;
+    }
+    // transpose trailing blocks on primary dimension
+    transposeBlock(Frem, 4, aptr, lda, bptr, ldb);
+  }
+  // transpose trailing blocks on secondary dimension
+  aptr = a + 4*Sblock*lda;
+  bptr = b + 4*Sblock;
+  for (size_t first = 0; first < Fblock; ++first) {
+    transposeBlock(4, Srem, aptr, lda, bptr, ldb);
+    aptr+=4;
+    bptr+=4*ldb;
+  }
+  // transpose bottom right-hand corner
+  transposeBlock(Frem, Srem, aptr, lda, bptr, ldb);
+}
+
+#ifdef __SSE2__
+/*
+ * SSE2 supported fast copy, transpose and cast
+ */
+#include <emmintrin.h>
+
+template <>
+void transpose4x4<float, float>(const float* src, size_t lda, float* dst, size_t ldb) {
+  __m128 row0, row1, row2, row3;
+  row0 = _mm_loadu_ps(src);
+  row1 = _mm_loadu_ps(src+lda);
+  row2 = _mm_loadu_ps(src+2*lda);
+  row3 = _mm_loadu_ps(src+3*lda);
+  _MM_TRANSPOSE4_PS(row0, row1, row2, row3);
+  _mm_storeu_ps(dst, row0);
+  _mm_storeu_ps(dst+ldb, row1);
+  _mm_storeu_ps(dst+2*ldb, row2);
+  _mm_storeu_ps(dst+3*ldb, row3);
+}
+
+#endif
+#endif
--- a/modules/matlab/test/CMakeLists.txt
+++ b/modules/matlab/test/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(TEST_PROXY ${CMAKE_CURRENT_BINARY_DIR}/test.proxy)
+file(REMOVE ${TEST_PROXY})
+
+# generate
+# call the python executable to generate the Matlab gateways
+add_custom_command(
+    OUTPUT ${TEST_PROXY}
+    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/OpenCVTest.m ${CMAKE_CURRENT_BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/testsuite.m ${CMAKE_CURRENT_BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E touch ${TEST_PROXY}
+    COMMENT "Building Matlab tests"
+)
+
+# targets
+# opencv_matlab_sources --> opencv_matlab
+add_custom_target(opencv_test_matlab ALL DEPENDS ${TEST_PROXY})
+add_dependencies(opencv_test_matlab ${the_module})
+
+# run the matlab test suite
+add_test(opencv_test_matlab
+    COMMAND ${MATLAB_BIN} "-nodisplay" "-r" "testsuite.m"
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+)
--- a/modules/matlab/test/OpenCVTest.m
+++ b/modules/matlab/test/OpenCVTest.m
@@ -0,0 +1,166 @@
+% Matlab binding test cases
+% Uses Matlab's builtin testing framework
+classdef OpenCVTest < matlab.unittest.TestCase
+
+  methods(Test)
+
+    % -------------------------------------------------------------------------
+    % EXCEPTIONS
+    % Check that errors and exceptions are thrown correctly
+    % -------------------------------------------------------------------------
+
+    % check that std exception is thrown
+    function stdException(testcase)
+      try
+        std_exception();
+        testcase.verifyFail();
+      catch
+        % TODO: Catch more specific exception
+        testcase.verifyTrue(true);
+      end
+    end
+
+    % check that OpenCV exceptions are correctly caught
+    function cvException(testcase)
+      try
+        cv_exception();
+        testcase.verifyFail();
+      catch
+        % TODO: Catch more specific exception
+        testcase.verifyTrue(true);
+      end
+    end
+
+    % check that all exceptions are caught
+    function allException(testcase)
+      try
+        exception();
+        testcase.verifyFail();
+      catch
+        % TODO: Catch more specific exception
+        testcase.verifyTrue(true);
+      end
+    end
+
+    % -------------------------------------------------------------------------
+    % SIZES AND FILLS
+    % Check that matrices are correctly filled and resized
+    % -------------------------------------------------------------------------
+
+    % check that a matrix is correctly filled with random numbers
+    function randomFill(testcase)
+      sz = [7 11];
+      mat = zeros(sz);
+      mat = cv.randn(mat, 0, 1);
+      testcase.verifyEqual(size(mat), sz, 'Matrix should not change size');
+      testcase.verifyNotEqual(mat, zeros(sz), 'Matrix should be nonzero');
+    end
+
+    function transpose(testcase)
+      m = randn(19, 81);
+      mt1 = transpose(m);
+      mt2 = cv.transpose(m);
+      testcase.verifyEqual(size(mt1), size(mt2), 'Matrix transposed to incorrect dimensionality');
+      testcase.verifyLessThan(norm(mt1 - mt2), 1e-8, 'Too much precision lost in tranposition');
+    end
+
+    % multiple return
+    function multipleReturn(testcase)
+      A = randn(10);
+      A = A'*A;
+      [V1, D1] = eig(A); D1 = diag(D1);
+      [~, D2, V2] = cv.eigen(A);
+      testcase.verifyLessThan(norm(V1 - V2), 1e-6, 'Too much precision lost in eigenvectors');
+      testcase.verifyLessThan(norm(D1 - D2), 1e-6, 'Too much precision lost in eigenvalues');
+    end
+
+    % complex output from SVD
+    function complexOutputSVD(testcase)
+      A = randn(10);
+      [V1, D1] = eig(A);
+      [~, D2, V2] = cv.eigen(A);
+      testcase.verifyTrue(~isreal(V2) && size(V2,3) == 1, 'Output should be complex');
+      testcase.verifyLessThan(norm(V1 - V2), 1e-6, 'Too much precision lost in eigenvectors');
+    end
+
+    % complex output from Fourier Transform
+    function complexOutputFFT(testcase)
+      A = randn(10);
+      F1 = fft2(A);
+      F2 = cv.dft(A, cv.DFT_COMPLEX_OUTPUT);
+      testcase.verifyTrue(~isreal(F2) && size(F2,3) == 1, 'Output should be complex');
+      testcase.verifyLessThan(norm(F1 - F2), 1e-6, 'Too much precision lost in eigenvectors');
+    end
+
+    % -------------------------------------------------------------------------
+    % TYPE CASTS
+    % Check that types are correctly cast
+    % -------------------------------------------------------------------------
+
+    % -------------------------------------------------------------------------
+    % PRECISION
+    % Check that basic operations are performed with sufficient precision
+    % -------------------------------------------------------------------------
+
+    % check that summing elements is within reasonable precision
+    function sumElements(testcase)
+      a = randn(5000);
+      b = sum(a(:));
+      c = cv.sum(a);
+      testcase.verifyLessThan(norm(b - c), 1e-8, 'Matrix reduction with insufficient precision');
+    end
+
+
+    % check that adding two matrices is within reasonable precision
+    function addPrecision(testcase)
+      a = randn(50);
+      b = randn(50);
+      c = a+b;
+      d = cv.add(a, b);
+      testcase.verifyLessThan(norm(c - d), 1e-8, 'Matrices are added with insufficient precision');
+    end
+
+    % check that performing gemm is within reasonable precision
+    function gemmPrecision(testcase)
+      a = randn(10, 50);
+      b = randn(50, 10);
+      c = randn(10, 10);
+      alpha = 2.71828;
+      gamma = 1.61803;
+      d = alpha*a*b + gamma*c;
+      e = cv.gemm(a, b, alpha, c, gamma);
+      testcase.verifyLessThan(norm(d - e), 1e-8, 'Matrices are multiplied with insufficient precision');
+    end
+
+
+    % -------------------------------------------------------------------------
+    % MISCELLANEOUS
+    % Miscellaneous tests
+    % -------------------------------------------------------------------------
+
+    % check that cv::waitKey waits for at least specified time
+    function waitKey(testcase)
+      tic();
+      cv.waitKey(500);
+      elapsed = toc();
+      testcase.verifyGreaterThan(elapsed, 0.5, 'Elapsed time should be at least 0.5 seconds');
+    end
+
+    % check that highgui window can be created and destroyed
+    function createAndDestroyWindow(testcase)
+      try
+        cv.namedWindow('test window');
+      catch
+        testcase.verifyFail('could not create window');
+      end
+
+      try
+        cv.destroyWindow('test window');
+      catch
+        testcase.verifyFail('could not destroy window');
+      end
+      testcase.verifyTrue(true);
+    end
+
+  end
+end
--- a/modules/matlab/test/cv_exception.cpp
+++ b/modules/matlab/test/cv_exception.cpp
@@ -0,0 +1,33 @@
+/*
+ * file:   exception.cpp
+ * author: Hilton Bristow
+ * date:   Wed, 19 Jun 2013 11:15:15
+ *
+ * See LICENCE for full modification and redistribution details.
+ * Copyright 2013 The OpenCV Foundation
+ */
+#include <exception>
+#include <opencv2/core.hpp>
+#include "mex.h"
+
+/*
+ * exception
+ * Gateway routine
+ *   nlhs - number of return arguments
+ *   plhs - pointers to return arguments
+ *   nrhs - number of input arguments
+ *   prhs - pointers to input arguments
+ */
+void mexFunction(int nlhs, mxArray* plhs[],
+                 int nrhs, const mxArray* prhs[]) {
+
+  // call the opencv function
+  // [out =] namespace.fun(src1, ..., srcn, dst1, ..., dstn, opt1, ..., optn);
+  try {
+    throw cv::Exception(-1, "OpenCV exception thrown", __func__, __FILE__, __LINE__);
+  } catch(cv::Exception& e) {
+    mexErrMsgTxt(e.what());
+  } catch(...) {
+    mexErrMsgTxt("Incorrect exception caught!");
+  }
+}
--- a/modules/matlab/test/exception.cpp
+++ b/modules/matlab/test/exception.cpp
@@ -0,0 +1,29 @@
+/*
+ * file:   exception.cpp
+ * author: Hilton Bristow
+ * date:   Wed, 19 Jun 2013 11:15:15
+ *
+ * See LICENCE for full modification and redistribution details.
+ * Copyright 2013 The OpenCV Foundation
+ */
+#include "mex.h"
+
+/*
+ * exception
+ * Gateway routine
+ *   nlhs - number of return arguments
+ *   plhs - pointers to return arguments
+ *   nrhs - number of input arguments
+ *   prhs - pointers to input arguments
+ */
+void mexFunction(int nlhs, mxArray* plhs[],
+                 int nrhs, const mxArray* prhs[]) {
+
+  // call the opencv function
+  // [out =] namespace.fun(src1, ..., srcn, dst1, ..., dstn, opt1, ..., optn);
+  try {
+    throw 1;
+  } catch(...) {
+    mexErrMsgTxt("Uncaught exception occurred!");
+  }
+}
--- a/modules/matlab/test/help.m
+++ b/modules/matlab/test/help.m
@@ -0,0 +1,15 @@
+function help()
+%CV.HELP display help information for the OpenCV Toolbox
+%
+%   Calling:
+%   >> cv.help();
+%
+%   is equivalent to calling:
+%   >> help cv;
+%
+%   It displays high-level usage information about the OpenCV toolbox
+%   along with resources to find out more information.
+%
+%   See also: cv.buildInformation
+  help('cv');
+end
--- a/modules/matlab/test/std_exception.cpp
+++ b/modules/matlab/test/std_exception.cpp
@@ -0,0 +1,32 @@
+/*
+ * file:   exception.cpp
+ * author: Hilton Bristow
+ * date:   Wed, 19 Jun 2013 11:15:15
+ *
+ * See LICENCE for full modification and redistribution details.
+ * Copyright 2013 The OpenCV Foundation
+ */
+#include <exception>
+#include "mex.h"
+
+/*
+ * exception
+ * Gateway routine
+ *   nlhs - number of return arguments
+ *   plhs - pointers to return arguments
+ *   nrhs - number of input arguments
+ *   prhs - pointers to input arguments
+ */
+void mexFunction(int nlhs, mxArray* plhs[],
+                 int nrhs, const mxArray* prhs[]) {
+
+  // call the opencv function
+  // [out =] namespace.fun(src1, ..., srcn, dst1, ..., dstn, opt1, ..., optn);
+  try {
+    throw std::exception();
+  } catch(std::exception& e) {
+    mexErrMsgTxt(e.what());
+  } catch(...) {
+    mexErrMsgTxt("Incorrect exception caught!");
+  }
+}
--- a/modules/matlab/test/test_compiler.cpp
+++ b/modules/matlab/test/test_compiler.cpp
@@ -0,0 +1,31 @@
+/*
+ * file:   rand.cpp
+ * author: A trusty code generator
+ * date:   Wed, 19 Jun 2013 11:15:15
+ *
+ * This file was autogenerated, do not modify.
+ * See LICENCE for full modification and redistribution details.
+ * Copyright 2013 The OpenCV Foundation
+ */
+#include "mex.h"
+#include <vector>
+
+/*
+ * rand
+ * Gateway routine
+ *   nlhs - number of return arguments
+ *   plhs - pointers to return arguments
+ *   nrhs - number of input arguments
+ *   prhs - pointers to input arguments
+ */
+void mexFunction(int nlhs, mxArray* plhs[],
+                 int nrhs, const mxArray* prhs[]) {
+
+  // call the opencv function
+  // [out =] namespace.fun(src1, ..., srcn, dst1, ..., dstn, opt1, ..., optn);
+  try {
+    rand();
+  } catch(...) {
+    mexErrMsgTxt("Uncaught exception occurred in rand");
+  }
+}
--- a/modules/matlab/test/test_generator.hpp
+++ b/modules/matlab/test/test_generator.hpp
@@ -0,0 +1,15 @@
+/*
+ * a rather innocuous-looking function which is actually
+ * part of <cstdlib>, so we can be reasonably sure its
+ * definition will be found
+ */
+#ifndef __OPENCV_MATLAB_TEST_GENERATOR_HPP_
+#define __OPENCV_MATLAB_TEST_GENERATOR_HPP_
+
+namespace cv {
+
+CV_EXPORTS_W int rand( );
+
+};
+
+#endif
--- a/modules/matlab/test/testsuite.m
+++ b/modules/matlab/test/testsuite.m
@@ -0,0 +1,11 @@
+% add the opencv bindings folder
+addpath ..
+
+%setup the tests
+opencv_tests = OpenCVTest();
+
+%run the tests
+result = run(opencv_tests);
+
+% shutdown
+exit();
--- a/modules/objdetect/include/opencv2/objdetect/erfilter.hpp
+++ b/modules/objdetect/include/opencv2/objdetect/erfilter.hpp
@@ -47,6 +47,7 @@
 #include "opencv2/core.hpp"
 #include <vector>
 #include <deque>
+#include <string>

 namespace cv
 {
@@ -163,7 +164,8 @@ public:
    local minimum is greater than minProbabilityDiff).

    \param  cb                Callback with the classifier.
-                              if omitted tries to load a default classifier from file trained_classifierNM1.xml
+                              default classifier can be implicitly load with function loadClassifierNM1()
+                              from file in samples/cpp/trained_classifierNM1.xml
    \param  thresholdDelta    Threshold step in subsequent thresholds when extracting the component tree
    \param  minArea           The minimum area (% of image size) allowed for retreived ER's
    \param  minArea           The maximum area (% of image size) allowed for retreived ER's
@@ -171,7 +173,7 @@ public:
    \param  nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities
    \param  minProbability    The minimum probability difference between local maxima and local minima ERs
 */
-CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb = Ptr<ERFilter::Callback>(),
+CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
                                                  int thresholdDelta = 1, float minArea = 0.00025,
                                                  float maxArea = 0.13, float minProbability = 0.4,
                                                  bool nonMaxSuppression = true,
@@ -187,11 +189,52 @@ CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb = P
    additional features: hole area ratio, convex hull ratio, and number of outer inflexion points.

    \param  cb             Callback with the classifier
-                           if omitted tries to load a default classifier from file trained_classifierNM2.xml
+                           default classifier can be implicitly load with function loadClassifierNM2()
+                           from file in samples/cpp/trained_classifierNM2.xml
    \param  minProbability The minimum probability P(er|character) allowed for retreived ER's
 */
-CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb = Ptr<ERFilter::Callback>(),
+CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
                                                  float minProbability = 0.3);

+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
+*/
+
+CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename);
+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
+*/
+
+CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename);
+
+
+// computeNMChannels operation modes
+enum { ERFILTER_NM_RGBLGrad = 0,
+       ERFILTER_NM_IHSGrad  = 1
+     };
+
+/*!
+    Compute the different channels to be processed independently in the N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient
+    magnitude channels (Grad) are used in order to obtain high localization recall.
+    This implementation also provides an alternative combination of red (R), green (G), blue (B),
+    lightness (L), and gradient magnitude (Grad).
+
+    \param  _src           Source image. Must be RGB CV_8UC3.
+    \param  _channels      Output vector<Mat> where computed channels are stored.
+    \param  _mode          Mode of operation. Currently the only available options are
+                           ERFILTER_NM_RGBLGrad (by default) and ERFILTER_NM_IHSGrad.
+
+*/
+CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
+
 }
 #endif // _OPENCV_ERFILTER_HPP_
--- a/modules/objdetect/src/erfilter.cpp
+++ b/modules/objdetect/src/erfilter.cpp
@@ -137,7 +137,7 @@ class CV_EXPORTS ERClassifierNM1 : public ERFilter::Callback
 {
 public:
    //Constructor
-    ERClassifierNM1();
+    ERClassifierNM1(const std::string& filename);
    // Destructor
    ~ERClassifierNM1() {};

@@ -153,7 +153,7 @@ class CV_EXPORTS ERClassifierNM2 : public ERFilter::Callback
 {
 public:
    //constructor
-    ERClassifierNM2();
+    ERClassifierNM2(const std::string& filename);
    // Destructor
    ~ERClassifierNM2() {};

@@ -988,24 +988,13 @@ int ERFilterNM::getNumRejected()


 // load default 1st stage classifier if found
-ERClassifierNM1::ERClassifierNM1()
+ERClassifierNM1::ERClassifierNM1(const std::string& filename)
 {

-    if (ifstream("./trained_classifierNM1.xml"))
-    {
-        // The file with default classifier exists
-        boost.load("./trained_classifierNM1.xml", "boost");
-    }
-    else if (ifstream("./training/trained_classifierNM1.xml"))
-    {
-        // The file with default classifier exists
-        boost.load("./training/trained_classifierNM1.xml", "boost");
-    }
+    if (ifstream(filename.c_str()))
+        boost.load( filename.c_str(), "boost" );
    else
-    {
-        // File not found
-        CV_Error(CV_StsBadArg, "Default classifier ./trained_classifierNM1.xml not found!");
-    }
+        CV_Error(CV_StsBadArg, "Default classifier file not found!");
 };

 double ERClassifierNM1::eval(const ERStat& stat)
@@ -1026,24 +1015,12 @@ double ERClassifierNM1::eval(const ERStat& stat)


 // load default 2nd stage classifier if found
-ERClassifierNM2::ERClassifierNM2()
+ERClassifierNM2::ERClassifierNM2(const std::string& filename)
 {
-
-    if (ifstream("./trained_classifierNM2.xml"))
-    {
-        // The file with default classifier exists
-        boost.load("./trained_classifierNM2.xml", "boost");
-    }
-    else if (ifstream("./training/trained_classifierNM2.xml"))
-    {
-        // The file with default classifier exists
-        boost.load("./training/trained_classifierNM2.xml", "boost");
-    }
+    if (ifstream(filename.c_str()))
+        boost.load( filename.c_str(), "boost" );
    else
-    {
-        // File not found
-        CV_Error(CV_StsBadArg, "Default classifier ./trained_classifierNM2.xml not found!");
-    }
+        CV_Error(CV_StsBadArg, "Default classifier file not found!");
 };

 double ERClassifierNM2::eval(const ERStat& stat)
@@ -1079,7 +1056,8 @@ double ERClassifierNM2::eval(const ERStat& stat)
    local minimum is greater than minProbabilityDiff).

    \param  cb                Callback with the classifier.
-                              if omitted tries to load a default classifier from file trained_classifierNM1.xml
+                              default classifier can be implicitly load with function loadClassifierNM1()
+                              from file in samples/cpp/trained_classifierNM1.xml
    \param  thresholdDelta    Threshold step in subsequent thresholds when extracting the component tree
    \param  minArea           The minimum area (% of image size) allowed for retreived ER's
    \param  minArea           The maximum area (% of image size) allowed for retreived ER's
@@ -1099,10 +1077,7 @@ Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb, int threshold

    Ptr<ERFilterNM> filter = makePtr<ERFilterNM>();

-    if (cb == NULL)
-        filter->setCallback(makePtr<ERClassifierNM1>());
-    else
-        filter->setCallback(cb);
+    filter->setCallback(cb);

    filter->setThresholdDelta(thresholdDelta);
    filter->setMinArea(minArea);
@@ -1123,7 +1098,8 @@ Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb, int threshold
    additional features: hole area ratio, convex hull ratio, and number of outer inflexion points.

    \param  cb             Callback with the classifier
-                           if omitted tries to load a default classifier from file trained_classifierNM2.xml
+                           default classifier can be implicitly load with function loadClassifierNM1()
+                           from file in samples/cpp/trained_classifierNM2.xml
    \param  minProbability The minimum probability P(er|character) allowed for retreived ER's
 */
 Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb, float minProbability)
@@ -1133,12 +1109,144 @@ Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb, float minProb

    Ptr<ERFilterNM> filter = makePtr<ERFilterNM>();

-    if (cb == NULL)
-        filter->setCallback(makePtr<ERClassifierNM2>());
-    else
-        filter->setCallback(cb);
+    filter->setCallback(cb);

    filter->setMinProbability(minProbability);
    return (Ptr<ERFilter>)filter;
 }
+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
+*/
+Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename)
+
+{
+    return makePtr<ERClassifierNM1>(filename);
+}
+
+/*!
+    Allow to implicitly load the default classifier when creating an ERFilter object.
+    The function takes as parameter the XML or YAML file with the classifier model
+    (e.g. trained_classifierNM2.xml) returns a pointer to ERFilter::Callback.
+*/
+Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename)
+{
+    return makePtr<ERClassifierNM2>(filename);
+}
+
+
+/* ------------------------------------------------------------------------------------*/
+/* -------------------------------- Compute Channels NM -------------------------------*/
+/* ------------------------------------------------------------------------------------*/
+
+
+void  get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude);
+
+void get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude)
+{
+    Mat C = Mat_<float>(_grey_img);
+
+    Mat kernel = (Mat_<float>(1,3) << -1,0,1);
+    Mat grad_x;
+    filter2D(C, grad_x, -1, kernel, Point(-1,-1), 0, BORDER_DEFAULT);
+
+    Mat kernel2 = (Mat_<float>(3,1) << -1,0,1);
+    Mat grad_y;
+    filter2D(C, grad_y, -1, kernel2, Point(-1,-1), 0, BORDER_DEFAULT);
+
+    magnitude( grad_x, grad_y, _gradient_magnitude);
+}
+
+
+/*!
+    Compute the diferent channels to be processed independently in the N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient
+    magnitude channels (Grad) are used in order to obatin high localization recall.
+    This implementation also the alternative combination of red (R), grren (G), blue (B),
+    lightness (L), and gradient magnitude (Grad).
+
+    \param  _src           Source image. Must be RGB CV_8UC3.
+    \param  _channels      Output vector<Mat> where computed channels are stored.
+    \param  _mode          Mode of operation. Currently the only available options are
+                           ERFILTER_NM_RGBLGrad and ERFILTER_NM_IHSGrad.
+
+*/
+void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode)
+{
+
+    CV_Assert( ( _mode == ERFILTER_NM_RGBLGrad ) || ( _mode == ERFILTER_NM_IHSGrad ) );
+
+    Mat src = _src.getMat();
+    if( src.empty() )
+    {
+        _channels.release();
+        return;
+    }
+
+    // assert RGB image
+    CV_Assert(src.type() == CV_8UC3);
+
+    if (_mode == ERFILTER_NM_IHSGrad)
+    {
+        _channels.create( 4, 1, src.depth());
+
+        Mat hsv;
+        cvtColor(src, hsv, COLOR_RGB2HSV);
+        vector<Mat> channelsHSV;
+        split(hsv, channelsHSV);
+
+        for (int i = 0; i < src.channels(); i++)
+        {
+            _channels.create(src.rows, src.cols, CV_8UC1, i);
+            Mat channel = _channels.getMat(i);
+            channelsHSV.at(i).copyTo(channel);
+        }
+
+        Mat grey;
+        cvtColor(src, grey, COLOR_RGB2GRAY);
+        Mat gradient_magnitude = Mat_<float>(grey.size());
+        get_gradient_magnitude( grey, gradient_magnitude);
+        gradient_magnitude.convertTo(gradient_magnitude, CV_8UC1);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 3);
+        Mat channelGrad = _channels.getMat(3);
+        gradient_magnitude.copyTo(channelGrad);
+
+    } else if (_mode == ERFILTER_NM_RGBLGrad) {
+
+        _channels.create( 5, 1, src.depth());
+
+        vector<Mat> channelsRGB;
+        split(src, channelsRGB);
+        for (int i = 0; i < src.channels(); i++)
+        {
+            _channels.create(src.rows, src.cols, CV_8UC1, i);
+            Mat channel = _channels.getMat(i);
+            channelsRGB.at(i).copyTo(channel);
+        }
+
+        Mat hls;
+        cvtColor(src, hls, COLOR_RGB2HLS);
+        vector<Mat> channelsHLS;
+        split(hls, channelsHLS);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 3);
+        Mat channelL = _channels.getMat(3);
+        channelsHLS.at(1).copyTo(channelL);
+
+        Mat grey;
+        cvtColor(src, grey, COLOR_RGB2GRAY);
+        Mat gradient_magnitude = Mat_<float>(grey.size());
+        get_gradient_magnitude( grey, gradient_magnitude);
+        gradient_magnitude.convertTo(gradient_magnitude, CV_8UC1);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 4);
+        Mat channelGrad = _channels.getMat(4);
+        gradient_magnitude.copyTo(channelGrad);
+    }
+}
 }
--- a/modules/ocl/doc/image_filtering.rst
+++ b/modules/ocl/doc/image_filtering.rst
@@ -162,7 +162,7 @@ ocl::bilateralFilter
 --------------------
 Returns void

-.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT)
+.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT)

    :param src: The source image

--- a/modules/ocl/include/opencv2/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl.hpp
@@ -519,7 +519,15 @@ namespace cv

        //! bilateralFilter
        // supports 8UC1 8UC4
-        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
+
+        //! Applies an adaptive bilateral filter to the input image
+        //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
+        //  the function calculates a constant at each window based on local standard deviation,
+        //  and use this constant to do filtering.
+        //  supports 8UC1 8UC3
+        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
+
        //! computes exponent of each matrix element (b = e**a)
        // supports only CV_32FC1 type
        CV_EXPORTS void exp(const oclMat &a, oclMat &b);
@@ -1797,6 +1805,155 @@ namespace cv
        //    keys   = {1,    2,   3}   (CV_8UC1)
        //    values = {6,2, 10,5, 4,3} (CV_8UC2)
        void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
+        /*!Base class for MOG and MOG2!*/
+        class CV_EXPORTS BackgroundSubtractor
+        {
+        public:
+            //! the virtual destructor
+            virtual ~BackgroundSubtractor();
+            //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
+            virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
+
+            //! computes a background image
+            virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
+        };
+                /*!
+        Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+
+        The class implements the following algorithm:
+        "An improved adaptive background mixture model for real-time tracking with shadow detection"
+        P. KadewTraKuPong and R. Bowden,
+        Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+        http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+        */
+        class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            int history;
+            float varThreshold;
+            float backgroundRatio;
+            float noiseSigma;
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat sortKey_;
+            oclMat mean_;
+            oclMat var_;
+        };
+
+        /*!
+        The class implements the following algorithm:
+        "Improved adaptive Gausian mixture model for background subtraction"
+        Z.Zivkovic
+        International Conference Pattern Recognition, UK, August, 2004.
+        http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+        */
+        class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
+        {
+        public:
+            //! the default constructor
+            MOG2(int nmixtures = -1);
+
+            //! re-initiaization method
+            void initialize(Size frameSize, int frameType);
+
+            //! the update operator
+            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
+
+            //! computes a background image which are the mean of all background gaussians
+            void getBackgroundImage(oclMat& backgroundImage) const;
+
+            //! releases all inner buffers
+            void release();
+
+            // parameters
+            // you should call initialize after parameters changes
+
+            int history;
+
+            //! here it is the maximum allowed number of mixture components.
+            //! Actual number is determined dynamically per pixel
+            float varThreshold;
+            // threshold on the squared Mahalanobis distance to decide if it is well described
+            // by the background model or not. Related to Cthr from the paper.
+            // This does not influence the update of the background. A typical value could be 4 sigma
+            // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+            /////////////////////////
+            // less important parameters - things you might change but be carefull
+            ////////////////////////
+
+            float backgroundRatio;
+            // corresponds to fTB=1-cf from the paper
+            // TB - threshold when the component becomes significant enough to be included into
+            // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+            // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+            // it is considered foreground
+            // float noiseSigma;
+            float varThresholdGen;
+
+            //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+            //when a sample is close to the existing components. If it is not close
+            //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+            //Smaller Tg leads to more generated components and higher Tg might make
+            //lead to small number of components but they can grow too large
+            float fVarInit;
+            float fVarMin;
+            float fVarMax;
+
+            //initial variance  for the newly generated components.
+            //It will will influence the speed of adaptation. A good guess should be made.
+            //A simple way is to estimate the typical standard deviation from the images.
+            //I used here 10 as a reasonable value
+            // min and max can be used to further control the variance
+            float fCT; //CT - complexity reduction prior
+            //this is related to the number of samples needed to accept that a component
+            //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+            //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+            //shadow detection parameters
+            bool bShadowDetection; //default 1 - do shadow detection
+            unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+            float fTau;
+            // Tau - shadow threshold. The shadow is detected if the pixel is darker
+            //version of the background. Tau is a threshold on how much darker the shadow can be.
+            //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+            //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+        private:
+            int nmixtures_;
+
+            Size frameSize_;
+            int frameType_;
+            int nframes_;
+
+            oclMat weight_;
+            oclMat variance_;
+            oclMat mean_;
+
+            oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
+        };
    }
 }
 #if defined _MSC_VER && _MSC_VER >= 1200
--- a/modules/ocl/perf/perf_bgfg.cpp
+++ b/modules/ocl/perf/perf_bgfg.cpp
@@ -0,0 +1,282 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+using namespace perf;
+using namespace std;
+using namespace cv::ocl;
+using namespace cv;
+using std::tr1::tuple;
+using std::tr1::get;
+#if defined(HAVE_XINE)         || \
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_AVFOUNDATION) || \
+    defined(HAVE_FFMPEG)       || \
+    defined(WIN32)
+
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
+#else
+#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
+#endif
+
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output)
+{
+    for(int i = 0; i< (int)(input.size()); i++)
+    {
+        cvtColor(input[i], output[i], COLOR_RGB2GRAY);
+    }
+}
+//prepare data for CPU
+static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer)
+{
+    cv::Mat frame;
+    std::vector<Mat> frame_buffer_init;
+    int nFrame = (int)frame_buffer.size();
+    for(int i = 0; i < nFrame; i++)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+        frame_buffer_init.push_back(frame);
+    }
+
+    if(cn == 1)
+        cvtFrameFmt(frame_buffer_init, frame_buffer);
+    else
+        frame_buffer = frame_buffer_init;
+}
+//copy CPU data to GPU
+static void prepareData(vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
+{
+    for(int i = 0; i < (int)frame_buffer.size(); i++)
+        frame_buffer_ocl.push_back(cv::ocl::oclMat(frame_buffer[i]));
+}
+#endif
+///////////// MOG ////////////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int, double> VideoMOGParamType;
+typedef TestBaseWithParam<VideoMOGParamType> VideoMOGFixture;
+
+PERF_TEST_P(VideoMOGFixture, MOG,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(1, 3),
+            ::testing::Values(0.0, 0.01)))
+{
+    VideoMOGParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    const float learningRate = static_cast<float>(get<2>(params));
+
+    const int nFrame = 5;
+
+    Mat foreground_cpu;
+    std::vector<Mat> frame_buffer(nFrame);
+    std::vector<oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    prepareData(cap, cn, frame_buffer);
+
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG> mog = createBackgroundSubtractorMOG();
+            foreground.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog->apply(frame_buffer[i], foreground, learningRate);
+            }
+        }
+        SANITY_CHECK(foreground);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG d_mog;
+            foreground_d.release();
+            for (int i = 0; i < nFrame; ++i)
+            {
+                d_mog(frame_buffer_ocl[i], foreground_d, learningRate);
+            }
+        }
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+///////////// MOG2 ////////////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef tuple<string, int> VideoMOG2ParamType;
+typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
+
+PERF_TEST_P(VideoMOG2Fixture, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(1, 3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+    prepareData(cap, cn, frame_buffer);
+    cv::Mat foreground;
+    cv::ocl::oclMat foreground_d;
+
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
+            mog2->set("detectShadows", false);
+            foreground.release();
+
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2->apply(frame_buffer[i], foreground);
+            }
+        }
+        SANITY_CHECK(foreground);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground_d);
+            }
+        }
+        foreground_d.download(foreground);
+        SANITY_CHECK(foreground);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
+
+///////////// MOG2_GetBackgroundImage //////////////////
+#if BUILD_WITH_VIDEO_INPUT_SUPPORT
+
+typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
+
+PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
+            ::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+            ::testing::Values(3)))
+{
+    VideoMOG2ParamType params = GetParam();
+
+    const string inputFile = perf::TestBase::getDataPath(get<0>(params));
+    const int cn = get<1>(params);
+    int nFrame = 5;
+
+    std::vector<cv::Mat> frame_buffer(nFrame);
+    std::vector<cv::ocl::oclMat> frame_buffer_ocl;
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    prepareData(cap, cn, frame_buffer);
+
+    cv::Mat foreground;
+    cv::Mat background;
+    cv::ocl::oclMat foreground_d;
+    cv::ocl::oclMat background_d;
+
+    if(RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE()
+        {
+            cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
+            mog2->set("detectShadows", false);
+            foreground.release();
+            background.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                mog2->apply(frame_buffer[i], foreground);
+            }
+            mog2->getBackgroundImage(background);
+        }
+        SANITY_CHECK(background);
+    }else if(RUN_OCL_IMPL)
+    {
+        prepareData(frame_buffer, frame_buffer_ocl);
+        CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
+        OCL_TEST_CYCLE()
+        {
+            cv::ocl::MOG2 d_mog2;
+            foreground_d.release();
+            background_d.release();
+            for (int i = 0; i < nFrame; i++)
+            {
+                d_mog2(frame_buffer_ocl[i], foreground_d);
+            }
+            d_mog2.getBackgroundImage(background_d);
+        }
+        background_d.download(background);
+        SANITY_CHECK(background);
+    }else
+        OCL_PERF_ELSE
+}
+#endif
--- a/modules/ocl/perf/perf_fft.cpp
+++ b/modules/ocl/perf/perf_fft.cpp
@@ -43,6 +43,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
+
 #include "perf_precomp.hpp"

 using namespace perf;
@@ -51,7 +52,9 @@ using namespace perf;

 typedef TestBaseWithParam<Size> dftFixture;

-PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not implemented
+#ifdef HAVE_CLAMDFFT
+
+PERF_TEST_P(dftFixture, dft, OCL_TYPICAL_MAT_SIZES)
 {
    const Size srcSize = GetParam();

@@ -70,7 +73,7 @@ PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not impleme

        oclDst.download(dst);

-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 1.5);
    }
    else if (RUN_PLAIN_IMPL)
    {
@@ -81,3 +84,5 @@ PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not impleme
    else
        OCL_PERF_ELSE
 }
+
+#endif
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -321,3 +321,82 @@ PERF_TEST_P(filter2DFixture, filter2D,
    else
        OCL_PERF_ELSE
 }
+
+///////////// Bilateral////////////////////////
+
+typedef Size_MatType BilateralFixture;
+
+PERF_TEST_P(BilateralFixture, Bilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), d = 7;
+    double sigmacolor = 50.0, sigmaspace = 50.0;
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000 && type == CV_8UC3)
+        declare.time(8);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// adaptiveBilateral////////////////////////
+
+typedef Size_MatType adaptiveBilateralFixture;
+
+PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+{
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+    double sigmaspace = 10.0;
+    Size ksize(9,9);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000)
+        declare.time(15);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1.);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@@ -51,8 +51,9 @@ using namespace perf;

 typedef TestBaseWithParam<Size> gemmFixture;

-PERF_TEST_P(gemmFixture, DISABLED_gemm,
-            ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000)) // TODO not implemented
+#ifdef HAVE_CLAMDBLAS
+
+PERF_TEST_P(gemmFixture, gemm, ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000))
 {
    const Size srcSize = GetParam();

@@ -72,14 +73,16 @@ PERF_TEST_P(gemmFixture, DISABLED_gemm,

        oclDst.download(dst);

-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 0.01);
    }
    else if (RUN_PLAIN_IMPL)
    {
        TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst);

-        SANITY_CHECK(dst);
+        SANITY_CHECK(dst, 0.01);
    }
    else
        OCL_PERF_ELSE
 }
+
+#endif
--- a/modules/ocl/perf/perf_precomp.hpp
+++ b/modules/ocl/perf/perf_precomp.hpp
@@ -67,6 +67,7 @@
 #include <vector>
 #include <numeric>

+#include "cvconfig.h"
 #include "opencv2/core.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/imgproc.hpp"
@@ -102,7 +103,7 @@ using namespace cv;

 #ifdef HAVE_OPENCV_GPU
 #define OCL_PERF_ELSE               \
-        if (RUN_GPU_IMPL)          \
+        if (RUN_GPU_IMPL)           \
            CV_TEST_FAIL_NO_IMPL(); \
        else                        \
            CV_TEST_FAIL_NO_IMPL();
--- a/modules/ocl/src/bgfg_mog.cpp
+++ b/modules/ocl/src/bgfg_mog.cpp
@@ -0,0 +1,638 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+using namespace cv;
+using namespace cv::ocl;
+namespace cv
+{
+    namespace ocl
+    {
+        extern const char* bgfg_mog;
+
+        typedef struct _contant_struct
+        {
+            cl_float c_Tb;
+            cl_float c_TB;
+            cl_float c_Tg;
+            cl_float c_varInit;
+            cl_float c_varMin;
+            cl_float c_varMax;
+            cl_float c_tau;
+            cl_uchar c_shadowVal;
+        }contant_struct;
+
+        cl_mem cl_constants = NULL;
+        float c_TB;
+    }
+}
+
+#if defined _MSC_VER
+#define snprintf sprintf_s
+#endif
+
+namespace cv { namespace ocl { namespace device
+{
+    namespace mog
+    {
+        void mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+            int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma);
+
+        void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio);
+
+        void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau,
+                            unsigned char shadowVal);
+
+        void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean,
+                      float alphaT, float prune, bool detectShadows, int nmixtures);
+
+        void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures);
+    }
+}}}
+
+namespace mog
+{
+    const int defaultNMixtures = 5;
+    const int defaultHistory = 200;
+    const float defaultBackgroundRatio = 0.7f;
+    const float defaultVarThreshold = 2.5f * 2.5f;
+    const float defaultNoiseSigma = 30.0f * 0.5f;
+    const float defaultInitialWeight = 0.05f;
+}
+void cv::ocl::BackgroundSubtractor::operator()(const oclMat&, oclMat&, float)
+{
+
+}
+cv::ocl::BackgroundSubtractor::~BackgroundSubtractor()
+{
+
+}
+
+cv::ocl::MOG::MOG(int nmixtures) :
+frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
+    history = mog::defaultHistory;
+    varThreshold = mog::defaultVarThreshold;
+    backgroundRatio = mog::defaultBackgroundRatio;
+    noiseSigma = mog::defaultNoiseSigma;
+}
+
+void cv::ocl::MOG::initialize(cv::Size frameSize, int frameType)
+{
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store
+    // the mixture sort key (w/sum_of_variances), the mixture weight (w),
+    // the mean (nchannels values) and
+    // the diagonal covariance matrix (another nchannels values)
+
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+    var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+    weight_.setTo(cv::Scalar::all(0));
+    sortKey_.setTo(cv::Scalar::all(0));
+    mean_.setTo(cv::Scalar::all(0));
+    var_.setTo(cv::Scalar::all(0));
+
+    nframes_ = 0;
+}
+
+void cv::ocl::MOG::operator()(const cv::ocl::oclMat& frame, cv::ocl::oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    CV_Assert(frame.depth() == CV_8U);
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog_ocl(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
+        varThreshold, learningRate, backgroundRatio, noiseSigma);
+}
+
+void cv::ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage_ocl(backgroundImage.oclchannels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio);
+}
+
+void cv::ocl::MOG::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    sortKey_.release();
+    mean_.release();
+    var_.release();
+    clReleaseMemObject(cl_constants);
+}
+
+static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withoutLearning_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+
+static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    oclMat fgmask(fgmask_raw.size(), CV_32SC1);
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int sortKey_step = (int)(sortKey.step/sortKey.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(var.step/var.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "mog_withLearning_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&sortKey.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&sortKey_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&learningRate));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&minVar));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmask_raw);
+}
+
+void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
+    int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma)
+{
+    const float minVar = noiseSigma * noiseSigma;
+
+    if(learningRate > 0.0f)
+        mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures,
+                         varThreshold, backgroundRatio, learningRate, minVar);
+    else
+        mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio);
+}
+
+void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {dst.cols, dst.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    String kernel_name = "getBackgroundImage_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
+{
+    varMin = cv::min(varMin, varMax);
+    varMax = cv::max(varMin, varMax);
+
+    c_TB = TB;
+
+    _contant_struct *constants = new _contant_struct;
+    constants->c_Tb = Tb;
+    constants->c_TB = TB;
+    constants->c_Tg = Tg;
+    constants->c_varInit = varInit;
+    constants->c_varMin = varMin;
+    constants->c_varMax = varMax;
+    constants->c_tau = tau;
+    constants->c_shadowVal = shadowVal;
+
+    cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()),
+        (void *)constants, sizeof(_contant_struct));
+}
+
+void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance,
+                                oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
+{
+    oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
+
+    Context* clCxt = Context::getContext();
+
+    const float alpha1 = 1.0f - alphaT;
+
+    cl_int detectShadows_flag = 0;
+    if(detectShadows)
+        detectShadows_flag = 1;
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {frame.cols, frame.rows, 1};
+
+    int frame_step = (int)(frame.step/frame.elemSize());
+    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int var_step = (int)(variance.step/variance.elemSize());
+
+    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
+    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
+    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
+
+    int frame_offset_y = (int)(frame.offset/frame.step);
+    int frame_offset_x = (int)(frame.offset%frame.step);
+    frame_offset_x = frame_offset_x/(int)frame.elemSize();
+
+    String kernel_name = "mog2_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&variance.data));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
+
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alphaT));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&alpha1));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&prune));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&detectShadows_flag));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&cl_constants));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+
+    fgmask.convertTo(fgmask, CV_8U);
+    fgmask.copyTo(fgmaskRaw);
+}
+
+void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
+{
+    Context* clCxt = Context::getContext();
+
+    size_t local_thread[] = {32, 8, 1};
+    size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};
+
+    int weight_step = (int)(weight.step/weight.elemSize());
+    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
+    int mean_step = (int)(mean.step/mean.elemSize());
+    int dst_step = (int)(dst.step/dst.elemSize());
+
+    int dst_y = (int)(dst.offset/dst.step);
+    int dst_x = (int)(dst.offset%dst.step);
+    dst_x = dst_x/(int)dst.elemSize();
+
+    String kernel_name = "getBackgroundImage2_kernel";
+    std::vector<std::pair<size_t, const void*> > args;
+
+    char build_option[50];
+    if(cn == 1)
+    {
+        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
+    }else
+    {
+        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
+    }
+
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
+    args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
+
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x));
+    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y));
+
+    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
+}
+
+/////////////////////////////////////////////////////////////////
+// MOG2
+
+namespace mog2
+{
+    // default parameters of gaussian background detection algorithm
+    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+    const float defaultVarThreshold = 4.0f * 4.0f;
+    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
+    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+    const float defaultVarThresholdGen = 3.0f * 3.0f;
+    const float defaultVarInit = 15.0f; // initial variance for new components
+    const float defaultVarMax = 5.0f * defaultVarInit;
+    const float defaultVarMin = 4.0f;
+
+    // additional parameters
+    const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+    const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+    const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
+}
+
+cv::ocl::MOG2::MOG2(int nmixtures) : frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
+
+    history = mog2::defaultHistory;
+    varThreshold = mog2::defaultVarThreshold;
+    bShadowDetection = true;
+
+    backgroundRatio = mog2::defaultBackgroundRatio;
+    fVarInit = mog2::defaultVarInit;
+    fVarMax  = mog2::defaultVarMax;
+    fVarMin = mog2::defaultVarMin;
+
+    varThresholdGen = mog2::defaultVarThresholdGen;
+    fCT = mog2::defaultfCT;
+    nShadowDetection =  mog2::defaultnShadowDetection;
+    fTau = mog2::defaultfTau;
+}
+
+void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
+{
+    using namespace cv::ocl::device::mog;
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+    nframes_ = 0;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store ...
+    // the mixture weight (w),
+    // the mean (nchannels values) and
+    // the covariance
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    weight_.setTo(Scalar::all(0));
+
+    variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    variance_.setTo(Scalar::all(0));
+
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); //4 channels
+    mean_.setTo(Scalar::all(0));
+
+    //make the array for keeping track of the used modes per pixel - all zeros at start
+    bgmodelUsedModes_.create(frameSize_, CV_32FC1);
+    bgmodelUsedModes_.setTo(cv::Scalar::all(0));
+
+    loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
+}
+
+void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate)
+{
+    using namespace cv::ocl::device::mog;
+
+    int ch = frame.oclchannels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+    fgmask.setTo(cv::Scalar::all(0));
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_);
+}
+
+void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
+{
+    using namespace cv::ocl::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_);
+}
+
+void cv::ocl::MOG2::release()
+{
+    frameSize_ = Size(0, 0);
+    frameType_ = 0;
+    nframes_ = 0;
+
+    weight_.release();
+    variance_.release();
+    mean_.release();
+
+    bgmodelUsedModes_.release();
+}
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -63,6 +63,7 @@ extern const char *filter_sep_row;
 extern const char *filter_sep_col;
 extern const char *filtering_laplacian;
 extern const char *filtering_morph;
+extern const char *filtering_adaptive_bilateral;
 }
 }

@@ -1616,3 +1617,100 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
    Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
    f->apply(src, dst);
 }
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Adaptive Bilateral Filter
+
+void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
+{
+    CV_Assert((ksize.width & 1) && (ksize.height & 1));  // ksize must be odd
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);  // source must be 8bit RGB image
+    if( sigmaSpace <= 0 )
+        sigmaSpace = 1;
+    Mat lut(Size(ksize.width, ksize.height), CV_32FC1);
+    double sigma2 = sigmaSpace * sigmaSpace;
+    int idx = 0;
+    int w = ksize.width / 2;
+    int h = ksize.height / 2;
+    for(int y=-h; y<=h; y++)
+        for(int x=-w; x<=w; x++)
+    {
+        lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
+    }
+    oclMat dlut(lut);
+    int depth = src.depth();
+    int cn = src.oclchannels();
+
+    normalizeAnchor(anchor, ksize);
+    const static String kernelName = "edgeEnhancingFilter";
+
+    dst.create(src.size(), src.type());
+
+    char btype[30];
+    switch(borderType)
+    {
+    case BORDER_CONSTANT:
+        sprintf(btype, "BORDER_CONSTANT");
+        break;
+    case BORDER_REPLICATE:
+        sprintf(btype, "BORDER_REPLICATE");
+        break;
+    case BORDER_REFLECT:
+        sprintf(btype, "BORDER_REFLECT");
+        break;
+    case BORDER_WRAP:
+        sprintf(btype, "BORDER_WRAP");
+        break;
+    case BORDER_REFLECT101:
+        sprintf(btype, "BORDER_REFLECT_101");
+        break;
+    default:
+        CV_Error(CV_StsBadArg, "This border type is not supported");
+        break;
+    }
+
+    //the following constants may be adjusted for performance concerns
+    const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1;
+
+    //Normalize the result by default
+    const float alpha = ksize.height * ksize.width;
+
+    const size_t gSize = blockSizeX - ksize.width / 2 * 2;
+    const size_t globalSizeX = (src.cols) % gSize == 0 ?
+        src.cols / gSize * blockSizeX :
+        (src.cols / gSize + 1) * blockSizeX;
+    const size_t rows_per_thread = 1 + EXTRA;
+    const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
+        ((src.rows + rows_per_thread - 1) / rows_per_thread) :
+        (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
+
+    size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
+    size_t localThreads[3]  = { blockSizeX, blockSizeY, 1};
+
+    char build_options[250];
+
+    //LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
+    sprintf(build_options,
+        "-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
+        " -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
+        static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
+
+    std::vector<std::pair<size_t , const void *> > args;
+    args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
+    args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+    args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data));
+    int lut_step = dlut.step1();
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step));
+
+    openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
+        globalThreads, localThreads, args, cn, depth, build_options);
+}
--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -46,16 +46,62 @@
 #include <iomanip>
 #include "precomp.hpp"

+namespace cv { namespace ocl {
+
+// used for clAmdBlas library to avoid redundant setup/teardown
+void clBlasSetup();
+void clBlasTeardown();
+
+}} /* namespace cv { namespace ocl */
+
+
 #if !defined HAVE_CLAMDBLAS
 void cv::ocl::gemm(const oclMat&, const oclMat&, double,
                   const oclMat&, double, oclMat&, int)
 {
    CV_Error(Error::StsNotImplemented, "OpenCL BLAS is not implemented");
 }
+
+void cv::ocl::clBlasSetup()
+{
+    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    //intentionally do nothing
+}
+
 #else
 #include "clAmdBlas.h"
 using namespace cv;

+static bool clBlasInitialized = false;
+static Mutex cs;
+
+void cv::ocl::clBlasSetup()
+{
+    if(!clBlasInitialized)
+    {
+        AutoLock al(cs);
+        if(!clBlasInitialized)
+        {
+            openCLSafeCall(clAmdBlasSetup());
+            clBlasInitialized = true;
+        }
+    }
+}
+
+void cv::ocl::clBlasTeardown()
+{
+    AutoLock al(cs);
+    if(clBlasInitialized)
+    {
+        clAmdBlasTeardown();
+        clBlasInitialized = false;
+    }
+}
+
 void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
                   const oclMat &src3, double beta, oclMat &dst, int flags)
 {
@@ -71,7 +117,8 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
        dst.create(src1.rows, src2.cols, src1.type());
        dst.setTo(Scalar::all(0));
    }
-    openCLSafeCall( clAmdBlasSetup() );
+
+    clBlasSetup();

    const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
    const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
@@ -156,6 +203,5 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
    }
    break;
    }
-    clAmdBlasTeardown();
 }
 #endif
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -65,6 +65,7 @@ namespace cv
    namespace ocl
    {
        extern void fft_teardown();
+        extern void clBlasTeardown();
        /*
         * The binary caching system to eliminate redundant program source compilation.
         * Strictly, this is not a cache because we do not implement evictions right now.
@@ -1058,6 +1059,7 @@ namespace cv
        void Info::release()
        {
            fft_teardown();
+            clBlasTeardown();
            impl->release();
            impl = new Impl;
            DeviceName.clear();
@@ -1067,6 +1069,7 @@ namespace cv
        Info::~Info()
        {
            fft_teardown();
+            clBlasTeardown();
            impl->release();
        }

--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -0,0 +1,535 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if defined (CN1)
+#define T_FRAME uchar
+#define T_MEAN_VAR float
+#define CONVERT_TYPE convert_uchar_sat
+#define F_ZERO (0.0f)
+float cvt(uchar val)
+{
+    return val;
+}
+
+float sqr(float val)
+{
+    return val * val;
+}
+
+float sum(float val)
+{
+    return val;
+}
+
+float clamp1(float var, float learningRate, float diff, float minVar)
+{
+    return fmax(var + learningRate * (diff * diff - var), minVar);
+}
+#else
+#define T_FRAME uchar4
+#define T_MEAN_VAR float4
+#define CONVERT_TYPE convert_uchar4_sat
+#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+float4 cvt(const uchar4 val)
+{
+    float4 result;
+    result.x = val.x;
+    result.y = val.y;
+    result.z = val.z;
+    result.w = val.w;
+
+    return result;
+}
+
+float sqr(const float4 val)
+{
+    return val.x * val.x + val.y * val.y + val.z * val.z;
+}
+
+float sum(const float4 val)
+{
+    return (val.x + val.y + val.z);
+}
+
+float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+{
+    float4 result;
+    result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
+    result.y = fmax(var.y + learningRate * (diff.y * diff.y - var.y), minVar);
+    result.z = fmax(var.z + learningRate * (diff.z * diff.z - var.z), minVar);
+    result.w = 0.0f;
+    return result;
+}
+#endif
+
+typedef struct
+{
+    float c_Tb;
+    float c_TB;
+    float c_Tg;
+    float c_varInit;
+    float c_varMin;
+    float c_varMax;
+    float c_tau;
+    uchar c_shadowVal;
+}con_srtuct_t;
+
+void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float4 val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
+    __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
+    int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, int fgmask_offset_x,
+    int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < frame_col && y < frame_row)
+    {
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+        int kHit = -1;
+        int kForeground = -1;
+
+        for (int k = 0; k < (NMIXTURES); ++k)
+        {
+            if (weight[(k * frame_row + y) * weight_step + x] < 1.192092896e-07f)
+                break;
+
+            T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+            T_MEAN_VAR _var = var[(k * frame_row + y) + var_step + x];
+
+            T_MEAN_VAR diff = pix - mu;
+
+            if (sqr(diff) < varThreshold * sum(_var))
+            {
+                kHit = k;
+                break;
+            }
+        }
+
+        if (kHit >= 0)
+        {
+            float wsum = 0.0f;
+            for (int k = 0; k < (NMIXTURES); ++k)
+            {
+                wsum += weight[(k * frame_row + y) * weight_step + x];
+
+                if (wsum > backgroundRatio)
+                {
+                    kForeground = k + 1;
+                    break;
+                }
+            }
+        }
+        if(kHit < 0 || kHit >= kForeground)
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (-1);
+        else
+            fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar) (0);
+    }
+}
+
+__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask,
+    __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean,
+    __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
+    int weight_step, int sortKey_step, int mean_step, int var_step,
+    float varThreshold, float backgroundRatio, float learningRate, float minVar,
+    int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y)
+{
+    const float w0 = 0.05f;
+    const float sk0 = w0 / 30.0f;
+    const float var0 = 900.f;
+
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x >= frame_col || y >= frame_row) return;
+    float wsum = 0.0f;
+    int kHit = -1;
+    int kForeground = -1;
+    int k = 0;
+
+    T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
+
+    for (; k < (NMIXTURES); ++k)
+    {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        wsum += w;
+
+        if (w < 1.192092896e-07f)
+            break;
+
+        T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
+        T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
+
+        float sortKey_prev, weight_prev;
+        T_MEAN_VAR mean_prev, var_prev;
+        if (sqr(pix - mu) < varThreshold * sum(_var))
+        {
+            wsum -= w;
+            float dw = learningRate * (1.0f - w);
+
+            _var = clamp1(_var, learningRate, pix - mu, minVar);
+
+            sortKey_prev = w / sqr(sum(_var));
+            sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
+
+            weight_prev = w + dw;
+            weight[(k * frame_row + y) * weight_step + x] = weight_prev;
+
+            mean_prev = mu + learningRate * (pix - mu);
+            mean[(k * frame_row + y) * mean_step + x] = mean_prev;
+
+            var_prev = _var;
+            var[(k * frame_row + y) * var_step + x] = var_prev;
+        }
+
+        int k1 = k - 1;
+
+        if (k1 >= 0 && sqr(pix - mu) < varThreshold * sum(_var))
+        {
+            float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
+            float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
+            T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
+            T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
+
+            for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
+            {
+                sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
+                sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
+
+                weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
+                weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
+
+                mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
+                mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
+
+                var[(k1 * frame_row + y) * var_step + x] = var_prev;
+                var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
+
+                sortKey_prev = sortKey_next;
+                sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
+
+                weight_prev = weight_next;
+                weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
+
+                mean_prev = mean_next;
+                mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
+
+                var_prev = var_next;
+                var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
+            }
+        }
+
+        kHit = k1 + 1;
+        break;
+    }
+
+    if (kHit < 0)
+    {
+        kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
+        wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
+
+        weight[(k * frame_row + y) * weight_step + x] = w0;
+        mean[(k * frame_row + y) * mean_step + x] = pix;
+#if defined (CN1)
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
+#else
+        var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
+#endif
+        sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
+    }
+    else
+    {
+        for( ; k < (NMIXTURES); k++)
+            wsum += weight[(k * frame_row + y) * weight_step + x];
+    }
+
+    float wscale = 1.0f / wsum;
+    wsum = 0;
+    for (k = 0; k < (NMIXTURES); ++k)
+    {
+        float w = weight[(k * frame_row + y) * weight_step + x];
+        w *= wscale;
+        wsum += w;
+
+        weight[(k * frame_row + y) * weight_step + x] = w;
+        sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
+
+        kForeground = select(kForeground, k + 1, wsum > backgroundRatio && kForeground < 0);
+    }
+    fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-(kHit >= kForeground));
+}
+
+
+__kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
+    int dst_row, int dst_col, int weight_step, int mean_step, int dst_step,
+    float backgroundRatio)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < dst_col && y < dst_row)
+    {
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < (NMIXTURES); ++mode)
+        {
+            float _weight = weight[(mode * dst_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * dst_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > backgroundRatio)
+                break;
+        }
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[y * dst_step + x] = CONVERT_TYPE(meanVal);
+    }
+}
+
+__kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
+        __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step,
+        int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune,
+        int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < frame_col && y < frame_row)
+    {
+        T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + x + frame_offset_x]);
+
+        bool background = false; // true - the pixel classified as background
+
+        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+        int nmodes = modesUsed[y * modesUsed_step + x];
+        int nNewModes = nmodes; //current number of modes in GMM
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = alpha1 * weight[(mode * frame_row + y) * weight_step + x] + prune;
+
+            if (!fitsPDF)
+            {
+                float var = variance[(mode * frame_row + y) * var_step + x];
+
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR diff = _mean - pix;
+                float dist2 = sqr(diff);
+
+                if (totalWeight < constants -> c_TB && dist2 < constants -> c_Tb * var)
+                    background = true;
+
+                if (dist2 < constants -> c_Tg * var)
+                {
+                    fitsPDF = true;
+                    _weight += alphaT;
+                    float k = alphaT / _weight;
+                    mean[(mode * frame_row + y) * mean_step + x] = _mean - k * diff;
+                    float varnew = var + k * (dist2 - var);
+                    varnew = fmax(varnew, constants -> c_varMin);
+                    varnew = fmin(varnew, constants -> c_varMax);
+
+                    variance[(mode * frame_row + y) * var_step + x] = varnew;
+                    for (int i = mode; i > 0; --i)
+                    {
+                        if (_weight < weight[((i - 1) * frame_row + y) * weight_step + x])
+                            break;
+                        swap(weight, x, y, i - 1, frame_row, weight_step);
+                        swap(variance, x, y, i - 1, frame_row, var_step);
+                        #if defined (CN1)
+                        swap(mean, x, y, i - 1, frame_row, mean_step);
+                        #else
+                        swap4(mean, x, y, i - 1, frame_row, mean_step);
+                        #endif
+                    }
+                }
+            } // !fitsPDF
+
+            if (_weight < -prune)
+            {
+                _weight = 0.0;
+                nmodes--;
+            }
+
+            weight[(mode * frame_row + y) * weight_step + x] = _weight; //update weight by the calculated value
+            totalWeight += _weight;
+        }
+
+        totalWeight = 1.f / totalWeight;
+        for (int mode = 0; mode < nmodes; ++mode)
+            weight[(mode * frame_row + y) * weight_step + x] *= totalWeight;
+
+        nmodes = nNewModes;
+
+        if (!fitsPDF)
+        {
+            int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
+
+            if (nmodes == 1)
+                weight[(mode * frame_row + y) * weight_step + x] = 1.f;
+            else
+            {
+                weight[(mode * frame_row + y) * weight_step + x] = alphaT;
+
+                for (int i = 0; i < nmodes - 1; ++i)
+                    weight[(i * frame_row + y) * weight_step + x] *= alpha1;
+            }
+
+            mean[(mode * frame_row + y) * mean_step + x] = pix;
+            variance[(mode * frame_row + y) * var_step + x] = constants -> c_varInit;
+
+            for (int i = nmodes - 1; i > 0; --i)
+            {
+                // check one up
+                if (alphaT < weight[((i - 1) * frame_row + y) * weight_step + x])
+                    break;
+
+                swap(weight, x, y, i - 1, frame_row, weight_step);
+                swap(variance, x, y, i - 1, frame_row, var_step);
+                #if defined (CN1)
+                swap(mean, x, y, i - 1, frame_row, mean_step);
+                #else
+                swap4(mean, x, y, i - 1, frame_row, mean_step);
+                #endif
+            }
+        }
+
+        modesUsed[y * modesUsed_step + x] = nmodes;
+
+        bool isShadow = false;
+        if (detectShadows_flag && !background)
+        {
+            float tWeight = 0.0f;
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                T_MEAN_VAR _mean = mean[(mode * frame_row + y) * mean_step + x];
+
+                T_MEAN_VAR pix_mean = pix * _mean;
+
+                float numerator = sum(pix_mean);
+                float denominator = sqr(_mean);
+
+                if (denominator == 0)
+                    break;
+
+                if (numerator <= denominator && numerator >= constants -> c_tau * denominator)
+                {
+                    float a = numerator / denominator;
+
+                    T_MEAN_VAR dD = a * _mean - pix;
+
+                    if (sqr(dD) < constants -> c_Tb * variance[(mode * frame_row + y) * var_step + x] * a * a)
+                    {
+                        isShadow = true;
+                        break;
+                    }
+                }
+
+                tWeight += weight[(mode * frame_row + y) * weight_step + x];
+                if (tWeight > constants -> c_TB)
+                    break;
+            }
+        }
+
+        fgmask[(y + fgmask_offset_y) * fgmask_step + x + fgmask_offset_x] = background ? 0 : isShadow ? constants -> c_shadowVal : 255;
+    }
+}
+
+__kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
+    __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step,
+    int mean_step, int dst_step, int dst_x, int dst_y)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < modesUsed_col && y < modesUsed_row)
+    {
+        int nmodes = modesUsed[y * modesUsed_step + x];
+
+        T_MEAN_VAR meanVal = (T_MEAN_VAR)F_ZERO;
+
+        float totalWeight = 0.0f;
+
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            float _weight = weight[(mode * modesUsed_row + y) * weight_step + x];
+
+            T_MEAN_VAR _mean = mean[(mode * modesUsed_row + y) * mean_step + x];
+            meanVal = meanVal + _weight * _mean;
+
+            totalWeight += _weight;
+
+            if(totalWeight > c_TB)
+                break;
+        }
+
+        meanVal = meanVal * (1.f / totalWeight);
+        dst[(y + dst_y) * dst_step + x + dst_x] = CONVERT_TYPE(meanVal);
+    }
+}
--- a/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
+++ b/modules/ocl/src/opencl/filtering_adaptive_bilateral.cl
@@ -0,0 +1,424 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Harris Gasparakis, harris.gasparakis@amd.com
+//    Xiaopeng Fu, fuxiaopeng2222@163.com
+//    Yao Wang, bitwangyaoyao@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifdef BORDER_REPLICATE
+//BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
+#endif
+
+#ifdef BORDER_REFLECT
+//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
+#endif
+
+#ifdef BORDER_REFLECT_101
+//BORDER_REFLECT_101:   gfedcb|abcdefgh|gfedcba
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
+#endif
+
+//blur function does not support BORDER_WRAP
+#ifdef BORDER_WRAP
+//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
+#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
+#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
+#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
+#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
+#endif
+
+__kernel void
+edgeEnhancingFilter_C4_D0(
+    __global const uchar4 * restrict src,
+    __global uchar4 *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float* lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step) >> 2;
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step) >> 2;
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar4 data[ksY+EXTRA][THREADS];
+
+    float4 tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float4)(0,0,0,0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar4 ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step>>2) + cur_col];
+        }
+
+        data[j][col] = con ? ss : (uchar4)0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step>>2) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float4 var[1+EXTRA];
+
+#if VAR_PER_CHANNEL
+    float4 weight;
+    float4 totalWeight = (float4)(0,0,0,0);
+#else
+    float weight;
+    float totalWeight = 0;
+#endif
+
+    int4 currValCenter;
+    int4 currWRTCenter;
+
+    int4 sumVal = 0;
+    int4 sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int4 currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj ;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal =0;
+            sumValSqr=0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+#if VAR_PER_CHANNEL
+                    weight.x = 1.0f;
+                    weight.y = 1.0f;
+                    weight.z = 1.0f;
+                    weight.w = 1.0f;
+#else
+                    weight = 1.0f;
+#endif
+#else
+                    currVal	= convert_int4(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+#if VAR_PER_CHANNEL
+                    weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]);
+                    //weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ;
+                    //weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ;
+                    //weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ;
+                    //weight.w = 0;
+#else
+                    weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) +  mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
+#endif
+#endif
+                    tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
+            }
+
+#if VAR_PER_CHANNEL
+            totalWeight = (float4)(0,0,0,0);
+#else
+            totalWeight = 0;
+#endif
+        }
+    }
+}
+
+
+__kernel void
+edgeEnhancingFilter_C1_D0(
+    __global const uchar * restrict src,
+    __global uchar *dst,
+    float alpha,
+    int src_offset,
+    int src_whole_rows,
+    int src_whole_cols,
+    int src_step,
+    int dst_offset,
+    int dst_rows,
+    int dst_cols,
+    int dst_step,
+    __global const float * lut,
+    int lut_step)
+{
+    int col = get_local_id(0);
+    const int gX = get_group_id(0);
+    const int gY = get_group_id(1);
+
+    int src_x_off = (src_offset % src_step);
+    int src_y_off = src_offset / src_step;
+    int dst_x_off = (dst_offset % dst_step);
+    int dst_y_off = dst_offset / dst_step;
+
+    int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
+    int startY = (gY * (1+EXTRA)) - anY + src_y_off;
+
+    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
+    int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
+
+    int posX = dst_startX - dst_x_off + col;
+    int posY = (gY * (1+EXTRA))	;
+
+    __local uchar data[ksY+EXTRA][THREADS];
+
+    float tmp_sum[1+EXTRA];
+    for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
+    {
+        tmp_sum[tmpint] = (float)(0);
+    }
+
+#ifdef BORDER_CONSTANT
+    bool con;
+    uchar ss;
+    for(int j = 0;	j < ksY+EXTRA; j++)
+    {
+        con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
+
+        int cur_col = clamp(startX + col, 0, src_whole_cols);
+        if(con)
+        {
+            ss = src[(startY+j)*(src_step) + cur_col];
+        }
+
+        data[j][col] = con ? ss : 0;
+    }
+#else
+    for(int j= 0; j < ksY+EXTRA; j++)
+    {
+        int selected_row;
+        int selected_col;
+        selected_row = ADDR_H(startY+j, 0, src_whole_rows);
+        selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
+
+        selected_col = ADDR_L(startX+col, 0, src_whole_cols);
+        selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
+        data[j][col] = src[selected_row * (src_step) + selected_col];
+    }
+#endif
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float var[1+EXTRA];
+
+    float weight;
+    float totalWeight = 0;
+
+    int currValCenter;
+    int currWRTCenter;
+
+    int sumVal = 0;
+    int sumValSqr = 0;
+
+    if(col < (THREADS-(ksX-1)))
+    {
+        int currVal;
+
+        int howManyAll = (2*anX+1)*(ksY);
+
+        //find variance of all data
+        int startLMj;
+        int endLMj;
+#if CALCVAR
+        // Top row: don't sum the very last element
+        for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
+        {
+            startLMj = extraCnt;
+            endLMj =  ksY+extraCnt-1;
+            sumVal = 0;
+            sumValSqr =0;
+            for(int j = startLMj; j < endLMj; j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+                    currVal	= (uint)(data[j][col+anX+i])	;
+
+                    sumVal += currVal;
+                    sumValSqr += mul24(currVal, currVal);
+                }
+            }
+            var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) /  ( (float)(howManyAll*howManyAll) ) ;
+#else
+        var[extraCnt] = (float)(900.0);
+#endif
+        }
+
+        for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
+        {
+
+            // top row: include the very first element, even on first time
+            startLMj = extraCnt;
+            // go all the way, unless this is the last local mem chunk,
+            // then stay within limits - 1
+            endLMj =  extraCnt + ksY;
+
+            // Top row: don't sum the very last element
+            currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] );
+
+            for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
+            {
+                for(int i=-anX; i<=anX; i++)
+                {
+#if FIXED_WEIGHT
+                    weight = 1.0f;
+#else
+                    currVal	= (int)(data[j][col+anX+i])	;
+                    currWRTCenter = currVal-currValCenter;
+
+                    weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
+#endif
+                    tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
+                    totalWeight += weight;
+                }
+            }
+
+            tmp_sum[extraCnt] /= totalWeight;
+
+
+            if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
+            {
+                dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
+            }
+
+            totalWeight = 0;
+        }
+    }
+}
--- a/modules/ocl/test/test_bgfg.cpp
+++ b/modules/ocl/test/test_bgfg.cpp
@@ -0,0 +1,227 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma, jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+extern string workdir;
+//////////////////////////////////////////////////////
+// MOG
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(UseGray, bool)
+    IMPLEMENT_PARAM_CLASS(LearningRate, double)
+}
+
+PARAM_TEST_CASE(mog, UseGray, LearningRate, bool)
+{
+    bool useGray;
+    double learningRate;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+
+        learningRate = GET_PARAM(1);
+
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG mog;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    Ptr<cv::BackgroundSubtractorMOG> mog_gold = createBackgroundSubtractorMOG();
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog(loadMat_ocl(frame, useRoi), foreground, (float)learningRate);
+
+        mog_gold->apply(frame, foreground_gold, learningRate);
+
+        EXPECT_MAT_NEAR(foreground_gold, foreground, 0.0);
+    }
+}
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog, testing::Combine(
+    testing::Values(UseGray(false), UseGray(true)),
+    testing::Values(LearningRate(0.0), LearningRate(0.01)),
+    Values(true, false)));
+
+//////////////////////////////////////////////////////
+// MOG2
+
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
+}
+
+PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool)
+{
+    bool useGray;
+    bool detectShadow;
+    bool useRoi;
+    virtual void SetUp()
+    {
+        useGray = GET_PARAM(0);
+        detectShadow = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+    }
+};
+
+TEST_P(mog2, Update)
+{
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi);
+
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
+    mog2_gold->set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold->apply(frame, foreground_gold);
+
+        if (detectShadow)
+            EXPECT_MAT_SIMILAR(foreground_gold, foreground, 15e-3)
+        else
+            EXPECT_MAT_NEAR(foreground_gold, foreground, 0)
+    }
+}
+
+TEST_P(mog2, getBackgroundImage)
+{
+    if (useGray)
+        return;
+
+    std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi";
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::ocl::MOG2 mog2;
+    mog2.bShadowDetection = detectShadow;
+    cv::ocl::oclMat foreground;
+
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = createBackgroundSubtractorMOG2();
+    mog2_gold->set("detectShadows", detectShadow);
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        mog2(loadMat_ocl(frame, useRoi), foreground);
+
+        mog2_gold->apply(frame, foreground_gold);
+    }
+
+    cv::ocl::oclMat background = createMat_ocl(frame.size(), frame.type(), useRoi);
+    mog2.getBackgroundImage(background);
+
+    cv::Mat background_gold;
+    mog2_gold->getBackgroundImage(background_gold);
+
+    EXPECT_MAT_NEAR(background_gold, background, 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, mog2, testing::Combine(
+    testing::Values(UseGray(true), UseGray(false)),
+    testing::Values(DetectShadow(true), DetectShadow(false)),
+    Values(true, false)));
+
+#endif
--- a/modules/ocl/test/test_filters.cpp
+++ b/modules/ocl/test/test_filters.cpp
@@ -353,6 +353,69 @@ TEST_P(Filter2D, Mat)
        Near(1);
    }
 }
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Bilateral
+struct Bilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    double sigmacolor, sigmaspace;
+
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        cv::RNG &rng = TS::ptr()->get_rng();
+        sigmacolor = rng.uniform(20, 100);
+        sigmaspace = rng.uniform(10, 40);
+    }
+};
+
+TEST_P(Bilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::bilateralFilter(mat1_roi, dst_roi, ksize.width, sigmacolor, sigmaspace, bordertype);
+        cv::ocl::bilateralFilter(gmat1, gdst, ksize.width, sigmacolor, sigmaspace, bordertype);
+        Near(1);
+    }
+
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// AdaptiveBilateral
+struct AdaptiveBilateral : FilterTestBase
+{
+    int type;
+    cv::Size ksize;
+    int bordertype;
+    Point anchor;
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        ksize = GET_PARAM(1);
+        bordertype = GET_PARAM(3);
+        Init(type);
+        anchor = Point(-1,-1);
+    }
+};
+
+TEST_P(AdaptiveBilateral, Mat)
+{
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::adaptiveBilateralFilter(mat1_roi, dst_roi, ksize, 5, anchor, bordertype);
+        cv::ocl::adaptiveBilateralFilter(gmat1, gdst, ksize, 5, anchor, bordertype);
+        Near(1);
+    }
+
+}
+
 INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
                        Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
@@ -400,4 +463,17 @@ INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
                        Values(Size(0, 0)), //not use
                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));

+INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_WRAP, (MatType)cv::BORDER_REFLECT_101)));
+
+INSTANTIATE_TEST_CASE_P(Filter, AdaptiveBilateral, Combine(
+                        Values(CV_8UC1, CV_8UC3),
+                        Values(Size(5, 5), Size(9, 9)),
+                        Values(Size(0, 0)), //not use
+                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
+                               (MatType)cv::BORDER_REFLECT,  (MatType)cv::BORDER_REFLECT_101)));
 #endif // HAVE_OPENCL
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -475,56 +475,6 @@ TEST_P(equalizeHist, Mat)
 }


-
-
-
-////////////////////////////////bilateralFilter////////////////////////////////////////////
-
-struct bilateralFilter : ImgprocTestBase {};
-
-TEST_P(bilateralFilter, Mat)
-{
-    double sigmacolor = 50.0;
-    int radius = 9;
-    int d = 2 * radius + 1;
-    double sigmaspace = 20.0;
-    int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
-    //const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
-
-    if (mat1.depth() != CV_8U || mat1.type() != dst.type())
-    {
-        cout << "Unsupported type" << endl;
-        EXPECT_DOUBLE_EQ(0.0, 0.0);
-    }
-    else
-    {
-        for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++)
-            for(int j = 0; j < LOOP_TIMES; j++)
-            {
-                random_roi();
-                if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
-                {
-                    continue;
-                }
-                //if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
-                //{
-                //	dst_roi.adjustROI(radius, radius, radius, radius);
-                //	cldst_roi.adjustROI(radius, radius, radius, radius);
-                //}
-                //else
-                //{
-                //	continue;
-                //}
-
-                cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
-                Near(1.);
-            }
-    }
-}
-
-
-
 ////////////////////////////////copyMakeBorder////////////////////////////////////////////

 struct CopyMakeBorder : ImgprocTestBase {};
@@ -1396,14 +1346,10 @@ TEST_P(calcHist, Mat)
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // CLAHE
-namespace
-{
-    IMPLEMENT_PARAM_CLASS(ClipLimit, double)
-}

-PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
+PARAM_TEST_CASE(CLAHE, cv::Size, double)
 {
-    cv::Size size;
+    cv::Size gridSize;
    double clipLimit;

    cv::Mat src;
@@ -1414,22 +1360,22 @@ PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)

    virtual void SetUp()
    {
-        size = GET_PARAM(0);
+        gridSize = GET_PARAM(0);
        clipLimit = GET_PARAM(1);

        cv::RNG &rng = TS::ptr()->get_rng();
-        src = randomMat(rng, size, CV_8UC1, 0, 256, false);
+        src = randomMat(rng, cv::Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false);
        g_src.upload(src);
    }
 };

 TEST_P(CLAHE, Accuracy)
 {
-    cv::Ptr<cv::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit, gridSize);
    clahe->apply(g_src, g_dst);
    cv::Mat dst(g_dst);

-    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
+    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit, gridSize);
    clahe_gold->apply(src, dst_gold);

    EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
@@ -1622,21 +1568,6 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
                            NULL_TYPE,
                            Values(false))); // Values(false) is the reserved parameter

-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	ONE_TYPE(CV_8UC1),
-//	NULL_TYPE,
-//	NULL_TYPE,
-//	Values(false))); // Values(false) is the reserved parameter
-INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            Values(CV_8UC1, CV_8UC3),
-                            NULL_TYPE,
-                            NULL_TYPE,
-                            Values(false))); // Values(false) is the reserved parameter
-

 INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
                            Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
@@ -1725,10 +1656,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
                            ONE_TYPE(CV_32SC1) //no use
                        ));

-INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
-                        Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
-                        Values(0.0, 40.0)));
+INSTANTIATE_TEST_CASE_P(Imgproc, CLAHE, Combine(
+                        Values(cv::Size(4, 4), cv::Size(32, 8), cv::Size(8, 64)),
+                        Values(0.0, 10.0, 62.0, 300.0)));

-INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
+INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, DIFFERENT_SIZES);

 #endif // HAVE_OPENCL
--- a/modules/ocl/test/test_optflow.cpp
+++ b/modules/ocl/test/test_optflow.cpp
@@ -164,7 +164,7 @@ TEST_P(TVL1, DISABLED_Accuracy) // TODO implementations of TV1 in video module a
    EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3);
    EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3);
 }
-INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false));
+INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(false, true));


 /////////////////////////////////////////////////////////////////////////////////////////////////
--- a/modules/ocl/test/utility.cpp
+++ b/modules/ocl/test/utility.cpp
@@ -100,6 +100,44 @@ Mat randomMat(Size size, int type, double minVal, double maxVal)
    return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
 }

+cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi)
+{
+    Size size0 = size;
+
+    if (useRoi)
+    {
+        size0.width += randomInt(5, 15);
+        size0.height += randomInt(5, 15);
+    }
+
+    cv::ocl::oclMat d_m(size0, type);
+
+    if (size0 != size)
+        d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
+
+    return d_m;
+}
+
+cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi)
+{
+    CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3);
+    cv::ocl::oclMat d_m;
+    d_m = createMat_ocl(m.size(), m.type(), useRoi);
+
+    Size ls;
+    Point pt;
+
+    d_m.locateROI(ls, pt);
+
+    Rect roi(pt.x, pt.y, d_m.size().width, d_m.size().height);
+
+    cv::ocl::oclMat m_ocl(m);
+
+    cv::ocl::oclMat d_m_roi(d_m, roi);
+
+    m_ocl.copyTo(d_m);
+    return d_m;
+}
 /*
 void showDiff(InputArray gold_, InputArray actual_, double eps)
 {
--- a/modules/ocl/test/utility.hpp
+++ b/modules/ocl/test/utility.hpp
@@ -72,6 +72,9 @@ double checkNorm(const cv::Mat &m);
 double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
 double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);

+//oclMat create
+cv::ocl::oclMat createMat_ocl(cv::Size size, int type, bool useRoi = false);
+cv::ocl::oclMat loadMat_ocl(const cv::Mat& m, bool useRoi = false);
 #define EXPECT_MAT_NORM(mat, eps) \
 { \
    EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
--- a/modules/optim/doc/downhill_simplex_method.rst
+++ b/modules/optim/doc/downhill_simplex_method.rst
@@ -0,0 +1,161 @@
+Downhill Simplex Method
+=======================
+
+.. highlight:: cpp
+
+optim::DownhillSolver
+---------------------------------
+
+.. ocv:class:: optim::DownhillSolver
+
+This class is used to perform the non-linear non-constrained *minimization* of a function, given on an *n*-dimensional Euclidean space,
+using the **Nelder-Mead method**, also known as **downhill simplex method**. The basic idea about the method can be obtained from
+(`http://en.wikipedia.org/wiki/Nelder-Mead\_method <http://en.wikipedia.org/wiki/Nelder-Mead_method>`_). It should be noted, that
+this method, although deterministic, is rather a heuristic and therefore may converge to a local minima, not necessary a global one.
+It is iterative optimization technique, which at each step uses an information about the values of a function evaluated only at
+*n+1* points, arranged as a *simplex* in *n*-dimensional space (hence the second name of the method). At each step new point is
+chosen to evaluate function at, obtained value is compared with previous ones and based on this information simplex changes it's shape
+, slowly moving to the local minimum.
+
+Algorithm stops when the number of function evaluations done exceeds ``termcrit.maxCount``, when the function values at the
+vertices of simplex are within ``termcrit.epsilon`` range or simplex becomes so small that it
+can enclosed in a box with ``termcrit.epsilon`` sides, whatever comes first, for some defined by user
+positive integer ``termcrit.maxCount`` and positive non-integer ``termcrit.epsilon``.
+
+::
+
+    class CV_EXPORTS Solver : public Algorithm
+    {
+    public:
+        class CV_EXPORTS Function
+        {
+        public:
+           virtual ~Function() {}
+           //! ndim - dimensionality
+           virtual double calc(const double* x) const = 0;
+        };
+
+        virtual Ptr<Function> getFunction() const = 0;
+        virtual void setFunction(const Ptr<Function>& f) = 0;
+
+        virtual TermCriteria getTermCriteria() const = 0;
+        virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
+
+        // x contain the initial point before the call and the minima position (if algorithm converged) after. x is assumed to be (something that
+        // after getMat() will return) row-vector or column-vector. *It's size  and should
+        // be consisted with previous dimensionality data given, if any (otherwise, it determines dimensionality)*
+        virtual double minimize(InputOutputArray x) = 0;
+    };
+
+    class CV_EXPORTS DownhillSolver : public Solver
+    {
+    public:
+        //! returns row-vector, even if the column-vector was given
+        virtual void getInitStep(OutputArray step) const=0;
+        //!This should be called at least once before the first call to minimize() and step is assumed to be (something that
+        //! after getMat() will return) row-vector or column-vector. *It's dimensionality determines the dimensionality of a problem.*
+        virtual void setInitStep(InputArray step)=0;
+    };
+
+It should be noted, that ``optim::DownhillSolver`` is a derivative of the abstract interface ``optim::Solver``, which in
+turn is derived from the ``Algorithm`` interface and is used to encapsulate the functionality, common to all non-linear optimization
+algorithms in the ``optim`` module.
+
+optim::DownhillSolver::getFunction
+--------------------------------------------
+
+Getter for the optimized function. The optimized function is represented by ``Solver::Function`` interface, which requires
+derivatives to implement the sole method ``calc(double*)`` to evaluate the function.
+
+.. ocv:function:: Ptr<Solver::Function> optim::DownhillSolver::getFunction()
+
+    :return: Smart-pointer to an object that implements ``Solver::Function`` interface - it represents the function that is being optimized. It can be empty, if no function was given so far.
+
+optim::DownhillSolver::setFunction
+-----------------------------------------------
+
+Setter for the optimized function. *It should be called at least once before the call to* ``DownhillSolver::minimize()``, as
+default value is not usable.
+
+.. ocv:function:: void optim::DownhillSolver::setFunction(const Ptr<Solver::Function>& f)
+
+    :param f: The new function to optimize.
+
+optim::DownhillSolver::getTermCriteria
+----------------------------------------------------
+
+Getter for the previously set terminal criteria for this algorithm.
+
+.. ocv:function:: TermCriteria optim::DownhillSolver::getTermCriteria()
+
+    :return: Deep copy of the terminal criteria used at the moment.
+
+optim::DownhillSolver::setTermCriteria
+------------------------------------------
+
+Set terminal criteria for downhill simplex method. Two things should be noted. First, this method *is not necessary* to be called
+before the first call to ``DownhillSolver::minimize()``, as the default value is sensible. Second, the method will raise an error
+if ``termcrit.type!=(TermCriteria::MAX_ITER+TermCriteria::EPS)``, ``termcrit.epsilon<=0`` or ``termcrit.maxCount<=0``. That is,
+both ``epsilon`` and ``maxCount`` should be set to positive values (non-integer and integer respectively) and they represent
+tolerance and maximal number of function evaluations that is allowed.
+
+Algorithm stops when the number of function evaluations done exceeds ``termcrit.maxCount``, when the function values at the
+vertices of simplex are within ``termcrit.epsilon`` range or simplex becomes so small that it
+can enclosed in a box with ``termcrit.epsilon`` sides, whatever comes first.
+
+.. ocv:function:: void optim::DownhillSolver::setTermCriteria(const TermCriteria& termcrit)
+
+    :param termcrit: Terminal criteria to be used, represented as ``TermCriteria`` structure (defined elsewhere in openCV). Mind you, that it should meet ``(termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && termcrit.epsilon>0 && termcrit.maxCount>0)``, otherwise the error will be raised.
+
+optim::DownhillSolver::getInitStep
+-----------------------------------
+
+Returns the initial step that will be used in downhill simplex algorithm. See the description
+of corresponding setter (follows next) for the meaning of this parameter.
+
+.. ocv:function:: void optim::getInitStep(OutputArray step)
+
+    :param step: Initial step that will be used in algorithm. Note, that although corresponding setter accepts column-vectors as well as row-vectors, this method will return a row-vector.
+
+optim::DownhillSolver::setInitStep
+----------------------------------
+
+Sets the initial step that will be used in downhill simplex algorithm. Step, together with initial point (givin in ``DownhillSolver::minimize``)
+are two *n*-dimensional vectors that are used to determine the shape of initial simplex. Roughly said, initial point determines the position
+of a simplex (it will become simplex's centroid), while step determines the spread (size in each dimension) of a simplex. To be more precise,
+if :math:`s,x_0\in\mathbb{R}^n` are the initial step and initial point respectively, the vertices of a simplex will be: :math:`v_0:=x_0-\frac{1}{2}
+s` and :math:`v_i:=x_0+s_i` for :math:`i=1,2,\dots,n` where :math:`s_i` denotes projections of the initial step of *n*-th coordinate (the result
+of projection is treated to be vector given by :math:`s_i:=e_i\cdot\left<e_i\cdot s\right>`, where :math:`e_i` form canonical basis)
+
+.. ocv:function:: void optim::setInitStep(InputArray step)
+
+    :param step: Initial step that will be used in algorithm. Roughly said, it determines the spread (size in each dimension) of an initial simplex.
+
+optim::DownhillSolver::minimize
+-----------------------------------
+
+The main method of the ``DownhillSolver``. It actually runs the algorithm and performs the minimization. The sole input parameter determines the
+centroid of the starting simplex (roughly, it tells where to start), all the others (terminal criteria, initial step, function to be minimized)
+are supposed to be set via the setters before the call to this method or the default values (not always sensible) will be used.
+
+.. ocv:function:: double optim::DownhillSolver::minimize(InputOutputArray x)
+
+    :param x: The initial point, that will become a centroid of an initial simplex. After the algorithm will terminate, it will be setted to the point where the algorithm stops, the point of possible minimum.
+
+    :return: The value of a function at the point found.
+
+optim::createDownhillSolver
+------------------------------------
+
+This function returns the reference to the ready-to-use ``DownhillSolver`` object. All the parameters are optional, so this procedure can be called
+even without parameters at all. In this case, the default values will be used. As default value for terminal criteria are the only sensible ones,
+``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()`` should be called upon the obtained object, if the respective parameters
+were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss the out and call the
+``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()``) are absolutely equivalent (and will drop the same errors in the same way,
+should invalid input be detected).
+
+.. ocv:function:: Ptr<optim::DownhillSolver> optim::createDownhillSolver(const Ptr<Solver::Function>& f,InputArray initStep, TermCriteria termcrit)
+
+    :param f: Pointer to the function that will be minimized, similarly to the one you submit via ``DownhillSolver::setFunction``.
+    :param step: Initial step, that will be used to construct the initial simplex, similarly to the one you submit via ``DownhillSolver::setInitStep``.
+    :param termcrit: Terminal criteria to the algorithm, similarly to the one you submit via ``DownhillSolver::setTermCriteria``.
--- a/modules/optim/doc/optim.rst
+++ b/modules/optim/doc/optim.rst
@@ -8,3 +8,5 @@ optim. Generic numerical optimization
    :maxdepth: 2

    linear_programming
+    downhill_simplex_method
+    primal_dual_algorithm
--- a/modules/optim/doc/primal_dual_algorithm.rst
+++ b/modules/optim/doc/primal_dual_algorithm.rst
@@ -0,0 +1,48 @@
+Primal-Dual Algorithm
+=======================
+
+.. highlight:: cpp
+
+optim::denoise_TVL1
+---------------------------------
+
+Primal-dual algorithm is an algorithm for solving special types of variational
+problems (that is, finding a function to minimize some functional)
+. As the image denoising, in particular, may be seen as the variational
+problem, primal-dual algorithm then can be used to perform denoising and this
+is exactly what is implemented.
+
+It should be noted, that this implementation was taken from the July 2013 blog entry [Mordvintsev]_, which also contained
+(slightly more general) ready-to-use
+source code on Python. Subsequently, that code was rewritten on C++ with the usage of openCV by Vadim Pisarevsky
+at the end of July 2013 and finally it was slightly adapted by later authors.
+
+Although the thorough discussion and justification
+of the algorithm involved may be found in [ChambolleEtAl]_, it might make sense to skim over it here, following [Mordvintsev]_. To
+begin with, we consider the 1-byte gray-level images as the functions from the rectangular domain of pixels
+(it may be seen as set :math:`\left\{(x,y)\in\mathbb{N}\times\mathbb{N}\mid 1\leq x\leq n,\;1\leq y\leq m\right\}`
+for some :math:`m,\;n\in\mathbb{N}`) into :math:`\{0,1,\dots,255\}`. We shall denote the noised images as :math:`f_i` and with this
+view, given some image :math:`x` of the same size, we may measure how bad it is by the formula
+
+.. math::
+        \left\|\left\|\nabla x\right\|\right\| + \lambda\sum_i\left\|\left\|x-f_i\right\|\right\|
+
+:math:`\|\|\cdot\|\|` here denotes :math:`L_2`-norm and as you see, the first addend states that we want our image to be smooth
+(ideally, having zero gradient, thus being constant) and the second states that we want our result to be close to the observations we've got.
+If we treat :math:`x` as a function, this is exactly the functional what we seek to minimize and here the Primal-Dual algorithm comes
+into play.
+
+.. ocv:function:: void optim::denoise_TVL1(const std::vector<Mat>& observations,Mat& result, double lambda, int niters)
+
+    :param observations: This array should contain one or more noised versions of the image that is to be restored.
+
+    :param result: Here the denoised image will be stored. There is no need to do pre-allocation of storage space, as it will be automatically allocated, if necessary.
+
+    :param lambda: Corresponds to :math:`\lambda` in the formulas above. As it is enlarged, the smooth (blurred) images are treated more favorably than detailed (but maybe more noised) ones. Roughly speaking, as it becomes smaller, the result will be more blur but more sever outliers will be removed.
+
+    :param niters: Number of iterations that the algorithm will run. Of course, as more iterations as better, but it is hard to quantitatively refine this statement, so just use the default and increase it if the results are poor.
+
+
+.. [ChambolleEtAl] A. Chambolle, V. Caselles, M. Novaga, D. Cremers and T. Pock, An Introduction to Total Variation for Image Analysis, http://hal.archives-ouvertes.fr/docs/00/43/75/81/PDF/preprint.pdf (pdf)
+
+.. [Mordvintsev] Alexander Mordvintsev, ROF and TV-L1 denoising with Primal-Dual algorithm, http://znah.net/rof-and-tv-l1-denoising-with-primal-dual-algorithm.html (blog entry)
--- a/modules/optim/include/opencv2/optim.hpp
+++ b/modules/optim/include/opencv2/optim.hpp
@@ -47,6 +47,45 @@

 namespace cv{namespace optim
 {
+class CV_EXPORTS Solver : public Algorithm
+{
+public:
+    class CV_EXPORTS Function
+    {
+    public:
+       virtual ~Function() {}
+       //! ndim - dimensionality
+       virtual double calc(const double* x) const = 0;
+    };
+
+    virtual Ptr<Function> getFunction() const = 0;
+    virtual void setFunction(const Ptr<Function>& f) = 0;
+
+    virtual TermCriteria getTermCriteria() const = 0;
+    virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
+
+    // x contain the initial point before the call and the minima position (if algorithm converged) after. x is assumed to be (something that
+    // after getMat() will return) row-vector or column-vector. *It's size  and should
+    // be consisted with previous dimensionality data given, if any (otherwise, it determines dimensionality)*
+    virtual double minimize(InputOutputArray x) = 0;
+};
+
+//! downhill simplex class
+class CV_EXPORTS DownhillSolver : public Solver
+{
+public:
+    //! returns row-vector, even if the column-vector was given
+    virtual void getInitStep(OutputArray step) const=0;
+    //!This should be called at least once before the first call to minimize() and step is assumed to be (something that
+    //! after getMat() will return) row-vector or column-vector. *It's dimensionality determines the dimensionality of a problem.*
+    virtual void setInitStep(InputArray step)=0;
+};
+
+// both minRange & minError are specified by termcrit.epsilon; In addition, user may specify the number of iterations that the algorithm does.
+CV_EXPORTS_W Ptr<DownhillSolver> createDownhillSolver(const Ptr<Solver::Function>& f=Ptr<Solver::Function>(),
+        InputArray initStep=Mat_<double>(1,1,0.0),
+        TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
+
 //!the return codes for solveLP() function
 enum
 {
@@ -57,6 +96,7 @@ enum
 };

 CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z);
+CV_EXPORTS_W void denoise_TVL1(const std::vector<Mat>& observations,Mat& result, double lambda=1.0, int niters=30);
 }}// cv

 #endif
--- a/modules/optim/src/debug.hpp
+++ b/modules/optim/src/debug.hpp
@@ -0,0 +1,18 @@
+namespace cv{namespace optim{
+#ifdef ALEX_DEBUG
+#define dprintf(x) printf x
+static void print_matrix(const Mat& x){
+    printf("\ttype:%d vs %d,\tsize: %d-on-%d\n",x.type(),CV_64FC1,x.rows,x.cols);
+    for(int i=0;i<x.rows;i++){
+        printf("\t[");
+        for(int j=0;j<x.cols;j++){
+            printf("%g, ",x.at<double>(i,j));
+        }
+        printf("]\n");
+    }
+}
+#else
+#define dprintf(x)
+#define print_matrix(x)
+#endif
+}}
--- a/modules/optim/src/denoise_tvl1.cpp
+++ b/modules/optim/src/denoise_tvl1.cpp
@@ -0,0 +1,128 @@
+#include "precomp.hpp"
+#undef ALEX_DEBUG
+#include "debug.hpp"
+#include <vector>
+#include <algorithm>
+
+#define ABSCLIP(val,threshold) MIN(MAX((val),-(threshold)),(threshold))
+
+namespace cv{namespace optim{
+
+    class AddFloatToCharScaled{
+        public:
+            AddFloatToCharScaled(double scale):_scale(scale){}
+            inline double operator()(double a,uchar b){
+                return a+_scale*((double)b);
+            }
+        private:
+            double _scale;
+    };
+
+#ifndef OPENCV_NOSTL
+    using std::transform;
+#else
+    template <class InputIterator, class InputIterator2, class OutputIterator, class BinaryOperator>
+    static OutputIterator transform (InputIterator first1, InputIterator last1, InputIterator2 first2,
+                                     OutputIterator result, BinaryOperator binary_op)
+    {
+        while (first1 != last1)
+        {
+            *result = binary_op(*first1, *first2);
+            ++result; ++first1; ++first2;
+        }
+        return result;
+    }
+#endif
+    void denoise_TVL1(const std::vector<Mat>& observations,Mat& result, double lambda, int niters){
+
+        CV_Assert(observations.size()>0 && niters>0 && lambda>0);
+
+        const double L2 = 8.0, tau = 0.02, sigma = 1./(L2*tau), theta = 1.0;
+        double clambda = (double)lambda;
+        double s=0;
+        const int workdepth = CV_64F;
+
+        int i, x, y, rows=observations[0].rows, cols=observations[0].cols,count;
+        for(i=1;i<(int)observations.size();i++){
+            CV_Assert(observations[i].rows==rows && observations[i].cols==cols);
+        }
+
+        Mat X, P = Mat::zeros(rows, cols, CV_MAKETYPE(workdepth, 2));
+        observations[0].convertTo(X, workdepth, 1./255);
+        std::vector< Mat_<double> > Rs(observations.size());
+        for(count=0;count<(int)Rs.size();count++){
+            Rs[count]=Mat::zeros(rows,cols,workdepth);
+        }
+
+        for( i = 0; i < niters; i++ )
+        {
+            double currsigma = i == 0 ? 1 + sigma : sigma;
+
+            // P_ = P + sigma*nabla(X)
+            // P(x,y) = P_(x,y)/max(||P(x,y)||,1)
+            for( y = 0; y < rows; y++ )
+            {
+                const double* x_curr = X.ptr<double>(y);
+                const double* x_next = X.ptr<double>(std::min(y+1, rows-1));
+                Point2d* p_curr = P.ptr<Point2d>(y);
+                double dx, dy, m;
+                for( x = 0; x < cols-1; x++ )
+                {
+                    dx = (x_curr[x+1] - x_curr[x])*currsigma + p_curr[x].x;
+                    dy = (x_next[x] - x_curr[x])*currsigma + p_curr[x].y;
+                    m = 1.0/std::max(std::sqrt(dx*dx + dy*dy), 1.0);
+                    p_curr[x].x = dx*m;
+                    p_curr[x].y = dy*m;
+                }
+                dy = (x_next[x] - x_curr[x])*currsigma + p_curr[x].y;
+                m = 1.0/std::max(std::abs(dy), 1.0);
+                p_curr[x].x = 0.0;
+                p_curr[x].y = dy*m;
+            }
+
+
+            //Rs = clip(Rs + sigma*(X-imgs), -clambda, clambda)
+            for(count=0;count<(int)Rs.size();count++){
+                transform<MatIterator_<double>,MatConstIterator_<uchar>,MatIterator_<double>,AddFloatToCharScaled>(
+                        Rs[count].begin(),Rs[count].end(),observations[count].begin<uchar>(),
+                        Rs[count].begin(),AddFloatToCharScaled(-sigma/255.0));
+                Rs[count]+=sigma*X;
+                min(Rs[count],clambda,Rs[count]);
+                max(Rs[count],-clambda,Rs[count]);
+            }
+
+            for( y = 0; y < rows; y++ )
+            {
+                double* x_curr = X.ptr<double>(y);
+                const Point2d* p_curr = P.ptr<Point2d>(y);
+                const Point2d* p_prev = P.ptr<Point2d>(std::max(y - 1, 0));
+
+                // X1 = X + tau*(-nablaT(P))
+                x = 0;
+                s=0.0;
+                for(count=0;count<(int)Rs.size();count++){
+                    s=s+Rs[count](y,x);
+                }
+                double x_new = x_curr[x] + tau*(p_curr[x].y - p_prev[x].y)-tau*s;
+                    // X = X2 + theta*(X2 - X)
+                x_curr[x] = x_new + theta*(x_new - x_curr[x]);
+
+
+                for(x = 1; x < cols; x++ )
+                {
+                    s=0.0;
+                    for(count=0;count<(int)Rs.size();count++){
+                        s+=Rs[count](y,x);
+                    }
+                        // X1 = X + tau*(-nablaT(P))
+                    x_new = x_curr[x] + tau*(p_curr[x].x - p_curr[x-1].x + p_curr[x].y - p_prev[x].y)-tau*s;
+                        // X = X2 + theta*(X2 - X)
+                    x_curr[x] = x_new + theta*(x_new - x_curr[x]);
+                }
+            }
+        }
+
+        result.create(X.rows,X.cols,CV_8U);
+        X.convertTo(result, CV_8U, 255);
+    }
+}}
--- a/modules/optim/src/lpsolver.cpp
+++ b/modules/optim/src/lpsolver.cpp
@@ -2,16 +2,12 @@
 #include <climits>
 #include <algorithm>
 #include <cstdarg>
+#include <debug.hpp>

 namespace cv{namespace optim{
 using std::vector;

 #ifdef ALEX_DEBUG
-#define dprintf(x) printf x
-static void print_matrix(const Mat& x){
-    print(x);
-    printf("\n");
-}
 static void print_simplex_state(const Mat& c,const Mat& b,double v,const std::vector<int> N,const std::vector<int> B){
    printf("\tprint simplex state\n");

@@ -32,8 +28,6 @@ static void print_simplex_state(const Mat& c,const Mat& b,double v,const std::ve
    printf("\n");
 }
 #else
-#define dprintf(x)
-#define print_matrix(x)
 #define print_simplex_state(c,b,v,N,B)
 #endif

--- a/modules/optim/src/simplex.cpp
+++ b/modules/optim/src/simplex.cpp
@@ -0,0 +1,273 @@
+#include "precomp.hpp"
+#include "debug.hpp"
+#include "opencv2/core/core_c.h"
+
+namespace cv{namespace optim{
+
+    class DownhillSolverImpl : public DownhillSolver
+    {
+    public:
+        void getInitStep(OutputArray step) const;
+        void setInitStep(InputArray step);
+        Ptr<Function> getFunction() const;
+        void setFunction(const Ptr<Function>& f);
+        TermCriteria getTermCriteria() const;
+        DownhillSolverImpl();
+        void setTermCriteria(const TermCriteria& termcrit);
+        double minimize(InputOutputArray x);
+    protected:
+        Ptr<Solver::Function> _Function;
+        TermCriteria _termcrit;
+        Mat _step;
+    private:
+        inline void createInitialSimplex(Mat_<double>& simplex,Mat& step);
+        inline double innerDownhillSimplex(cv::Mat_<double>& p,double MinRange,double MinError,int& nfunk,
+                const Ptr<Solver::Function>& f,int nmax);
+        inline double tryNewPoint(Mat_<double>& p,Mat_<double>& y,Mat_<double>& coord_sum,const Ptr<Solver::Function>& f,int ihi,
+                double fac,Mat_<double>& ptry);
+    };
+
+    double DownhillSolverImpl::tryNewPoint(
+        Mat_<double>& p,
+        Mat_<double>& y,
+        Mat_<double>&  coord_sum,
+        const Ptr<Solver::Function>& f,
+        int      ihi,
+        double   fac,
+        Mat_<double>& ptry
+        )
+    {
+        int ndim=p.cols;
+        int j;
+        double fac1,fac2,ytry;
+
+        fac1=(1.0-fac)/ndim;
+        fac2=fac1-fac;
+        for (j=0;j<ndim;j++)
+        {
+            ptry(j)=coord_sum(j)*fac1-p(ihi,j)*fac2;
+        }
+        ytry=f->calc((double*)ptry.data);
+        if (ytry < y(ihi))
+        {
+            y(ihi)=ytry;
+            for (j=0;j<ndim;j++)
+            {
+                coord_sum(j) += ptry(j)-p(ihi,j);
+                p(ihi,j)=ptry(j);
+            }
+        }
+
+        return ytry;
+    }
+
+    /*
+    Performs the actual minimization of Solver::Function f (after the initialization was done)
+
+    The matrix p[ndim+1][1..ndim] represents ndim+1 vertices that
+    form a simplex - each row is an ndim vector.
+    On output, nfunk gives the number of function evaluations taken.
+    */
+    double DownhillSolverImpl::innerDownhillSimplex(
+        cv::Mat_<double>&   p,
+        double     MinRange,
+        double     MinError,
+        int&       nfunk,
+        const Ptr<Solver::Function>& f,
+        int nmax
+        )
+    {
+        int ndim=p.cols;
+        double res;
+        int i,ihi,ilo,inhi,j,mpts=ndim+1;
+        double error, range,ysave,ytry;
+        Mat_<double> coord_sum(1,ndim,0.0),buf(1,ndim,0.0),y(1,ndim,0.0);
+
+        nfunk = 0;
+
+        for(i=0;i<ndim+1;++i)
+        {
+            y(i) = f->calc(p[i]);
+        }
+
+        nfunk = ndim+1;
+
+        reduce(p,coord_sum,0,CV_REDUCE_SUM);
+
+        for (;;)
+        {
+            ilo=0;
+            /*  find highest (worst), next-to-worst, and lowest
+                (best) points by going through all of them. */
+            ihi = y(0)>y(1) ? (inhi=1,0) : (inhi=0,1);
+            for (i=0;i<mpts;i++)
+            {
+                if (y(i) <= y(ilo))
+                    ilo=i;
+                if (y(i) > y(ihi))
+                {
+                    inhi=ihi;
+                    ihi=i;
+                }
+                else if (y(i) > y(inhi) && i != ihi)
+                    inhi=i;
+            }
+
+            /* check stop criterion */
+            error=fabs(y(ihi)-y(ilo));
+            range=0;
+            for(i=0;i<ndim;++i)
+            {
+                double min = p(0,i);
+                double max = p(0,i);
+                double d;
+                for(j=1;j<=ndim;++j)
+                {
+                    if( min > p(j,i) ) min = p(j,i);
+                    if( max < p(j,i) ) max = p(j,i);
+                }
+                d = fabs(max-min);
+                if(range < d) range = d;
+            }
+
+            if(range <= MinRange || error <= MinError)
+            { /* Put best point and value in first slot. */
+                std::swap(y(0),y(ilo));
+                for (i=0;i<ndim;i++)
+                {
+                    std::swap(p(0,i),p(ilo,i));
+                }
+                break;
+            }
+
+            if (nfunk >= nmax){
+                dprintf(("nmax exceeded\n"));
+                return y(ilo);
+            }
+            nfunk += 2;
+            /*Begin a new iteration. First, reflect the worst point about the centroid of others */
+            ytry = tryNewPoint(p,y,coord_sum,f,ihi,-1.0,buf);
+            if (ytry <= y(ilo))
+            { /*If that's better than the best point, go twice as far in that direction*/
+                ytry = tryNewPoint(p,y,coord_sum,f,ihi,2.0,buf);
+            }
+            else if (ytry >= y(inhi))
+            {   /* The new point is worse than the second-highest, but better
+                  than the worst so do not go so far in that direction */
+                ysave = y(ihi);
+                ytry = tryNewPoint(p,y,coord_sum,f,ihi,0.5,buf);
+                if (ytry >= ysave)
+                { /* Can't seem to improve things. Contract the simplex to good point
+               in hope to find a simplex landscape. */
+                    for (i=0;i<mpts;i++)
+                    {
+                        if (i != ilo)
+                        {
+                            for (j=0;j<ndim;j++)
+                            {
+                                p(i,j) = coord_sum(j) = 0.5*(p(i,j)+p(ilo,j));
+                            }
+                            y(i)=f->calc((double*)coord_sum.data);
+                        }
+                    }
+                    nfunk += ndim;
+                    reduce(p,coord_sum,0,CV_REDUCE_SUM);
+                }
+            } else --(nfunk); /* correct nfunk */
+            dprintf(("this is simplex on iteration %d\n",nfunk));
+            print_matrix(p);
+        } /* go to next iteration. */
+        res = y(0);
+
+        return res;
+    }
+
+    void DownhillSolverImpl::createInitialSimplex(Mat_<double>& simplex,Mat& step){
+        for(int i=1;i<=step.cols;++i)
+        {
+            simplex.row(0).copyTo(simplex.row(i));
+            simplex(i,i-1)+= 0.5*step.at<double>(0,i-1);
+        }
+        simplex.row(0) -= 0.5*step;
+
+        dprintf(("this is simplex\n"));
+        print_matrix(simplex);
+    }
+
+    double DownhillSolverImpl::minimize(InputOutputArray x){
+        dprintf(("hi from minimize\n"));
+        CV_Assert(_Function.empty()==false);
+        dprintf(("termcrit:\n\ttype: %d\n\tmaxCount: %d\n\tEPS: %g\n",_termcrit.type,_termcrit.maxCount,_termcrit.epsilon));
+        dprintf(("step\n"));
+        print_matrix(_step);
+
+        Mat x_mat=x.getMat();
+        CV_Assert(MIN(x_mat.rows,x_mat.cols)==1);
+        CV_Assert(MAX(x_mat.rows,x_mat.cols)==_step.cols);
+        CV_Assert(x_mat.type()==CV_64FC1);
+
+        Mat_<double> proxy_x;
+
+        if(x_mat.rows>1){
+            proxy_x=x_mat.t();
+        }else{
+            proxy_x=x_mat;
+        }
+
+        int count=0;
+        int ndim=_step.cols;
+        Mat_<double> simplex=Mat_<double>(ndim+1,ndim,0.0);
+
+        simplex.row(0).copyTo(proxy_x);
+        createInitialSimplex(simplex,_step);
+        double res = innerDownhillSimplex(
+                simplex,_termcrit.epsilon, _termcrit.epsilon, count,_Function,_termcrit.maxCount);
+        simplex.row(0).copyTo(proxy_x);
+
+        dprintf(("%d iterations done\n",count));
+
+        if(x_mat.rows>1){
+            Mat(x_mat.rows, 1, CV_64F, (double*)proxy_x.data).copyTo(x);
+        }
+        return res;
+    }
+    DownhillSolverImpl::DownhillSolverImpl(){
+        _Function=Ptr<Function>();
+        _step=Mat_<double>();
+    }
+    Ptr<Solver::Function> DownhillSolverImpl::getFunction()const{
+        return _Function;
+    }
+    void DownhillSolverImpl::setFunction(const Ptr<Function>& f){
+        _Function=f;
+    }
+    TermCriteria DownhillSolverImpl::getTermCriteria()const{
+        return _termcrit;
+    }
+    void DownhillSolverImpl::setTermCriteria(const TermCriteria& termcrit){
+        CV_Assert(termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && termcrit.epsilon>0 && termcrit.maxCount>0);
+        _termcrit=termcrit;
+    }
+    // both minRange & minError are specified by termcrit.epsilon; In addition, user may specify the number of iterations that the algorithm does.
+    Ptr<DownhillSolver> createDownhillSolver(const Ptr<Solver::Function>& f, InputArray initStep, TermCriteria termcrit){
+        DownhillSolver *DS=new DownhillSolverImpl();
+        DS->setFunction(f);
+        DS->setInitStep(initStep);
+        DS->setTermCriteria(termcrit);
+        return Ptr<DownhillSolver>(DS);
+    }
+    void DownhillSolverImpl::getInitStep(OutputArray step)const{
+        _step.copyTo(step);
+    }
+    void DownhillSolverImpl::setInitStep(InputArray step){
+        //set dimensionality and make a deep copy of step
+        Mat m=step.getMat();
+        dprintf(("m.cols=%d\nm.rows=%d\n",m.cols,m.rows));
+        CV_Assert(MIN(m.cols,m.rows)==1 && m.type()==CV_64FC1);
+        if(m.rows==1){
+            m.copyTo(_step);
+        }else{
+            transpose(m,_step);
+        }
+    }
+}}
--- a/modules/optim/test/test_denoise_tvl1.cpp
+++ b/modules/optim/test/test_denoise_tvl1.cpp
@@ -0,0 +1,80 @@
+#include "test_precomp.hpp"
+#include "opencv2/highgui.hpp"
+
+void make_noisy(const cv::Mat& img, cv::Mat& noisy, double sigma, double pepper_salt_ratio,cv::RNG& rng){
+    noisy.create(img.size(), img.type());
+    cv::Mat noise(img.size(), img.type()), mask(img.size(), CV_8U);
+    rng.fill(noise,cv::RNG::NORMAL,128.0,sigma);
+    cv::addWeighted(img, 1, noise, 1, -128, noisy);
+    cv::randn(noise, cv::Scalar::all(0), cv::Scalar::all(2));
+    noise *= 255;
+    cv::randu(mask, 0, cvRound(1./pepper_salt_ratio));
+    cv::Mat half = mask.colRange(0, img.cols/2);
+    half = cv::Scalar::all(1);
+    noise.setTo(128, mask);
+    cv::addWeighted(noisy, 1, noise, 1, -128, noisy);
+}
+void make_spotty(cv::Mat& img,cv::RNG& rng, int r=3,int n=1000){
+    for(int i=0;i<n;i++){
+        int x=rng(img.cols-r),y=rng(img.rows-r);
+        if(rng(2)==0){
+            img(cv::Range(y,y+r),cv::Range(x,x+r))=(uchar)0;
+        }else{
+            img(cv::Range(y,y+r),cv::Range(x,x+r))=(uchar)255;
+        }
+    }
+}
+
+bool validate_pixel(const cv::Mat& image,int x,int y,uchar val){
+    printf("test: image(%d,%d)=%d vs %d - %s\n",x,y,(int)image.at<uchar>(x,y),val,(val==image.at<uchar>(x,y))?"true":"false");
+    return (image.at<uchar>(x,y)==val);
+}
+
+TEST(Optim_denoise_tvl1, regression_basic){
+    cv::RNG rng(42);
+    cv::Mat img = cv::imread("lena.jpg", 0), noisy,res;
+    if(img.rows!=512 || img.cols!=512){
+        printf("\tplease, put lena.jpg from samples/c in the current folder\n");
+        printf("\tnow, the test will fail...\n");
+        ASSERT_TRUE(false);
+    }
+
+    const int obs_num=5;
+    std::vector<cv::Mat> images(obs_num,cv::Mat());
+    for(int i=0;i<(int)images.size();i++){
+        make_noisy(img,images[i], 20, 0.02,rng);
+        //make_spotty(images[i],rng);
+    }
+
+    //cv::imshow("test", images[0]);
+    cv::optim::denoise_TVL1(images, res);
+    //cv::imshow("denoised", res);
+    //cv::waitKey();
+
+#if 0
+    ASSERT_TRUE(validate_pixel(res,248,334,179));
+    ASSERT_TRUE(validate_pixel(res,489,333,172));
+    ASSERT_TRUE(validate_pixel(res,425,507,104));
+    ASSERT_TRUE(validate_pixel(res,489,486,105));
+    ASSERT_TRUE(validate_pixel(res,223,208,64));
+    ASSERT_TRUE(validate_pixel(res,418,3,78));
+    ASSERT_TRUE(validate_pixel(res,63,76,97));
+    ASSERT_TRUE(validate_pixel(res,29,134,126));
+    ASSERT_TRUE(validate_pixel(res,219,291,174));
+    ASSERT_TRUE(validate_pixel(res,384,124,76));
+#endif
+
+#if 1
+    ASSERT_TRUE(validate_pixel(res,248,334,194));
+    ASSERT_TRUE(validate_pixel(res,489,333,171));
+    ASSERT_TRUE(validate_pixel(res,425,507,103));
+    ASSERT_TRUE(validate_pixel(res,489,486,109));
+    ASSERT_TRUE(validate_pixel(res,223,208,72));
+    ASSERT_TRUE(validate_pixel(res,418,3,58));
+    ASSERT_TRUE(validate_pixel(res,63,76,93));
+    ASSERT_TRUE(validate_pixel(res,29,134,127));
+    ASSERT_TRUE(validate_pixel(res,219,291,180));
+    ASSERT_TRUE(validate_pixel(res,384,124,80));
+#endif
+
+}
--- a/modules/optim/test/test_downhill_simplex.cpp
+++ b/modules/optim/test/test_downhill_simplex.cpp
@@ -0,0 +1,63 @@
+#include "test_precomp.hpp"
+#include <cstdlib>
+#include <cmath>
+#include <algorithm>
+
+static void mytest(cv::Ptr<cv::optim::DownhillSolver> solver,cv::Ptr<cv::optim::Solver::Function> ptr_F,cv::Mat& x,cv::Mat& step,
+        cv::Mat& etalon_x,double etalon_res){
+    solver->setFunction(ptr_F);
+    int ndim=MAX(step.cols,step.rows);
+    solver->setInitStep(step);
+    cv::Mat settedStep;
+    solver->getInitStep(settedStep);
+    ASSERT_TRUE(settedStep.rows==1 && settedStep.cols==ndim);
+    ASSERT_TRUE(std::equal(step.begin<double>(),step.end<double>(),settedStep.begin<double>()));
+    std::cout<<"step setted:\n\t"<<step<<std::endl;
+    double res=solver->minimize(x);
+    std::cout<<"res:\n\t"<<res<<std::endl;
+    std::cout<<"x:\n\t"<<x<<std::endl;
+    std::cout<<"etalon_res:\n\t"<<etalon_res<<std::endl;
+    std::cout<<"etalon_x:\n\t"<<etalon_x<<std::endl;
+    double tol=solver->getTermCriteria().epsilon;
+    ASSERT_TRUE(std::abs(res-etalon_res)<tol);
+    /*for(cv::Mat_<double>::iterator it1=x.begin<double>(),it2=etalon_x.begin<double>();it1!=x.end<double>();it1++,it2++){
+        ASSERT_TRUE(std::abs((*it1)-(*it2))<tol);
+    }*/
+    std::cout<<"--------------------------\n";
+}
+
+class SphereF:public cv::optim::Solver::Function{
+public:
+    double calc(const double* x)const{
+        return x[0]*x[0]+x[1]*x[1];
+    }
+};
+class RosenbrockF:public cv::optim::Solver::Function{
+    double calc(const double* x)const{
+        return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0])+(1-x[0])*(1-x[0]);
+    }
+};
+
+TEST(Optim_Downhill, regression_basic){
+    cv::Ptr<cv::optim::DownhillSolver> solver=cv::optim::createDownhillSolver();
+#if 1
+    {
+        cv::Ptr<cv::optim::Solver::Function> ptr_F(new SphereF());
+        cv::Mat x=(cv::Mat_<double>(1,2)<<1.0,1.0),
+            step=(cv::Mat_<double>(2,1)<<-0.5,-0.5),
+            etalon_x=(cv::Mat_<double>(1,2)<<-0.0,0.0);
+        double etalon_res=0.0;
+        mytest(solver,ptr_F,x,step,etalon_x,etalon_res);
+    }
+#endif
+#if 1
+    {
+        cv::Ptr<cv::optim::Solver::Function> ptr_F(new RosenbrockF());
+        cv::Mat x=(cv::Mat_<double>(2,1)<<0.0,0.0),
+            step=(cv::Mat_<double>(2,1)<<0.5,+0.5),
+            etalon_x=(cv::Mat_<double>(2,1)<<1.0,1.0);
+        double etalon_res=0.0;
+        mytest(solver,ptr_F,x,step,etalon_x,etalon_res);
+    }
+#endif
+}
--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@@ -688,6 +688,23 @@ bool pyopencv_to(PyObject* obj, Point2f& p, const char* name)
    return PyArg_ParseTuple(obj, "ff", &p.x, &p.y) > 0;
 }

+template<>
+bool pyopencv_to(PyObject* obj, Point2d& p, const char* name)
+{
+    (void)name;
+    if(!obj || obj == Py_None)
+        return true;
+    if(!!PyComplex_CheckExact(obj))
+    {
+        Py_complex c = PyComplex_AsCComplex(obj);
+        p.x = saturate_cast<double>(c.real);
+        p.y = saturate_cast<double>(c.imag);
+        return true;
+    }
+    return PyArg_ParseTuple(obj, "dd", &p.x, &p.y) > 0;
+}
+
+
 template<>
 PyObject* pyopencv_from(const Point& p)
 {